diff -Nru solr-1.4.0+ds1/build.xml solr-3.3/build.xml --- solr-1.4.0+ds1/build.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/build.xml 2011-07-05 07:39:53.000000000 +0000 @@ -1,924 +1,112 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lib/lucene-core-${lucene_version}.jar Missing - - lib/lucene-core-${lucene_version}.jar does not exist. - This will cause problems with m2-deploy later, so fail fast now. - - Probably cause: lucene jars were upgraded w/o modifying the - 'lucene_version' property in common-build.xml - - - - - - - - - - <!ENTITY solr.specversion "${specversion}"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache Licens2e, Version 2.0 + the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> - + + - - - - - - - - - - - - + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - ... - - This task requires that the property 'stub.src.path' be set. - - It must contain a "path" listing directories containing source - files that this task should use when looking for classes that - need factories created, the format is platform specific -- - typically it is colon seperated in Unix, semi-colon seperated - on windows, ie: - - ant stub-factories -Dstub.src.path="./src:../lucene/contrib:../lucene/src/java" - - FYI: The file ${stub.list} contains a list of classes - that seem to need stub factories. (if java files can be found to - use as guides for creating them). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Clover not found. Code coverage reports disabled. - - - - - - ################################################################## - Clover not found. - Please make sure clover.jar is in ANT_HOME/lib, or made available - to Ant using other mechanisms like -lib or CLASSPATH. - ################################################################## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + - - - - - - - - + + + - - - - See ${example}/README.txt for how to run the Solr example configuration. + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + SUCCESS: You must right-click your project and choose Refresh + - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + To complete IntelliJ IDEA setup, you must manually configure + Project Structure | Project | Project SDK. + - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/CHANGES.txt solr-3.3/CHANGES.txt --- solr-1.4.0+ds1/CHANGES.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/CHANGES.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,2011 +0,0 @@ - - Apache Solr Release Notes - -Introduction ------------- -Apache Solr is an open source enterprise search server based on the Apache Lucene Java -search library, with XML/HTTP and JSON APIs, hit highlighting, faceted search, -caching, replication, and a web administration interface. It runs in a Java -servlet container such as Tomcat. - -See http://lucene.apache.org/solr for more information. - - -Getting Started ---------------- -You need a Java 1.5 VM or later installed. -In this release, there is an example Solr server including a bundled -servlet container in the directory named "example". -See the tutorial at http://lucene.apache.org/solr/tutorial.html - - -$Id: CHANGES.txt 832897 2009-11-04 23:05:48Z yonik $ - - -================== Release 1.4.0 ================== -Release Date: See http://lucene.apache.org/solr for the official release date. - -Upgrading from Solr 1.3 ------------------------ - -There is a new default faceting algorithm for multiVaued fields that should be -faster for most cases. One can revert to the previous algorithm (which has -also been improved somewhat) by adding facet.method=enum to the request. - -Searching and sorting is now done on a per-segment basis, meaning that -the FieldCache entries used for sorting and for function queries are -created and used per-segment and can be reused for segments that don't -change between index updates. While generally beneficial, this can lead -to increased memory usage over 1.3 in certain scenarios: - 1) A single valued field that was used for both sorting and faceting -in 1.3 would have used the same top level FieldCache entry. In 1.4, -sorting will use entries at the segment level while faceting will still -use entries at the top reader level, leading to increased memory usage. - 2) Certain function queries such as ord() and rord() require a top level -FieldCache instance and can thus lead to increased memory usage. Consider -replacing ord() and rord() with alternatives, such as function queries -based on ms() for date boosting. - -If you use custom Tokenizer or TokenFilter components in a chain specified in -schema.xml, they must support reusability. If your Tokenizer or TokenFilter -maintains state, it should implement reset(). If your TokenFilteFactory does -not return a subclass of TokenFilter, then it should implement reset() and call -reset() on it's input TokenStream. TokenizerFactory implementations must -now return a Tokenizer rather than a TokenStream. - -New users of Solr 1.4 will have omitTermFreqAndPositions enabled for non-text -indexed fields by default, which avoids indexing term frequency, positions, and -payloads, making the index smaller and faster. If you are upgrading from an -earlier Solr release and want to enable omitTermFreqAndPositions by default, -change the schema version from 1.1 to 1.2 in schema.xml. Remove any existing -index and restart Solr to ensure that omitTermFreqAndPositions completely takes -affect. - -The default QParserPlugin used by the QueryComponent for parsing the "q" param -has been changed, to remove support for the deprecated use of ";" as a separator -between the query string and the sort options when no "sort" param was used. -Users who wish to continue using the semi-colon based method of specifying the -sort options should explicitly set the defType param to "lucenePlusSort" on all -requests. (The simplest way to do this is by specifying it as a default param -for your request handlers in solrconfig.xml, see the example solrconfig.xml for -sample syntax.) - -If spellcheck.extendedResults=true, the response format for suggestions -has changed, see SOLR-1071. - -Use of the "charset" option when configuring the following Analysis -Factories has been deprecated and will cause a warning to be logged. -In future versions of Solr attempting to use this option will cause an -error. See SOLR-1410 for more information. - * GreekLowerCaseFilterFactory - * RussianStemFilterFactory - * RussianLowerCaseFilterFactory - * RussianLetterTokenizerFactory - -Versions of Major Components ----------------------------- -Apache Lucene 2.9.1 (r832363 on 2.9 branch) -Apache Tika 0.4 -Carrot2 3.1.0 - -Lucene Information ----------------- - -Since Solr is built on top of Lucene, many people add customizations to Solr -that are dependent on Lucene. Please see http://lucene.apache.org/java/2_9_0/, -especially http://lucene.apache.org/java/2_9_0/changes/Changes.html for more -information on the version of Lucene used in Solr. - -Detailed Change List ----------------------- - -New Features ----------------------- - 1. SOLR-560: Use SLF4J logging API rather then JDK logging. The packaged .war file is - shipped with a JDK logging implementation, so logging configuration for the .war should - be identical to solr 1.3. However, if you are using the .jar file, you can select - which logging implementation to use by dropping a different binding. - See: http://www.slf4j.org/ (ryan) - - 2. SOLR-617: Allow configurable index deletion policy and provide a default implementation which - allows deletion of commit points on various criteria such as number of commits, age of commit - point and optimized status. - See http://lucene.apache.org/java/2_3_2/api/org/apache/lucene/index/IndexDeletionPolicy.html - (yonik, Noble Paul, Akshay Ukey via shalin) - - 3. SOLR-658: Allow Solr to load index from arbitrary directory in dataDir - (Noble Paul, Akshay Ukey via shalin) - - 4. SOLR-793: Add 'commitWithin' argument to the update add command. This behaves - similar to the global autoCommit maxTime argument except that it is set for - each request. (ryan) - - 5. SOLR-670: Add support for rollbacks in UpdateHandler. This allows user to rollback all changes - since the last commit. (Noble Paul, koji via shalin) - - 6. SOLR-813: Adding DoubleMetaphone Filter and Factory. Similar to the PhoneticFilter, - but this uses DoubleMetaphone specific calls (including alternate encoding) - (Todd Feak via ryan) - - 7. SOLR-680: Add StatsComponent. This gets simple statistics on matched numeric fields, - including: min, max, mean, median, stddev. (koji, ryan) - - 7.1 SOLR-1380: Added support for multi-valued fields (Harish Agarwal via gsingers) - - 8. SOLR-561: Added Replication implemented in Java as a request handler. Supports index replication - as well as configuration replication and exposes detailed statistics and progress information - on the Admin page. Works on all platforms. (Noble Paul, yonik, Akshay Ukey, shalin) - - 9. SOLR-746: Added "omitHeader" request parameter to omit the header from the response. - (Noble Paul via shalin) - -10. SOLR-651: Added TermVectorComponent for serving up term vector information, plus IDF. - See http://wiki.apache.org/solr/TermVectorComponent (gsingers, Vaijanath N. Rao, Noble Paul) - -12. SOLR-795: SpellCheckComponent supports building indices on optimize if configured in solrconfig.xml - (Jason Rennie, shalin) - -13. SOLR-667: A LRU cache implementation based upon ConcurrentHashMap and other techniques to reduce - contention and synchronization overhead, to utilize multiple CPU cores more effectively. - (Fuad Efendi, Noble Paul, yonik via shalin) - -14. SOLR-465: Add configurable DirectoryProvider so that alternate Directory - implementations can be specified via solrconfig.xml. The default - DirectoryProvider will use NIOFSDirectory for better concurrency - on non Windows platforms. (Mark Miller, TJ Laurenzo via yonik) - -15. SOLR-822: Add CharFilter so that characters can be filtered (e.g. character normalization) - before Tokenizer/TokenFilters. (koji) - -16. SOLR-829: Allow slaves to request compressed files from master during replication - (Simon Collins, Noble Paul, Akshay Ukey via shalin) - -17. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities. - Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers) - - Added mincount and maxcount options (Khee Chin via gsingers) - -18. SOLR-538: Add maxChars attribute for copyField function so that the length limit for destination - can be specified. - (Georgios Stamatis, Lars Kotthoff, Chris Harris via koji) - -19. SOLR-284: Added support for extracting content from binary documents like MS Word and PDF using Apache Tika. See also contrib/extraction/CHANGES.txt (Eric Pugh, Chris Harris, yonik, gsingers) - -20. SOLR-819: Added factories for Arabic support (gsingers) - -21. SOLR-781: Distributed search ability to sort field.facet values - lexicographically. facet.sort values "true" and "false" are - also deprecated and replaced with "count" and "lex". - (Lars Kotthoff via yonik) - -22. SOLR-821: Add support for replication to copy conf file to slave with a different name. This allows replication - of solrconfig.xml - (Noble Paul, Akshay Ukey via shalin) - -23. SOLR-911: Add support for multi-select faceting by allowing filters to be - tagged and facet commands to exclude certain filters. This patch also - added the ability to change the output key for facets in the response, and - optimized distributed faceting refinement by lowering parsing overhead and - by making requests and responses smaller. - -24. SOLR-876: WordDelimiterFilter now supports a splitOnNumerics - option, as well as a list of protected terms. - (Dan Rosher via hossman) - -25. SOLR-928: SolrDocument and SolrInputDocument now implement the Map - interface. This should make plugging into other standard tools easier. (ryan) - -26. SOLR-847: Enhance the snappull command in ReplicationHandler to accept masterUrl. - (Noble Paul, Preetam Rao via shalin) - -27. SOLR-540: Add support for globbing in field names to highlight. - For example, hl.fl=*_text will highlight all fieldnames ending with - _text. (Lars Kotthoff via yonik) - -28. SOLR-906: Adding a StreamingUpdateSolrServer that writes update commands to - an open HTTP connection. If you are using solrj for bulk update requests - you should consider switching to this implementaion. However, note that - the error handling is not immediate as it is with the standard SolrServer. - (ryan) - -29. SOLR-865: Adding support for document updates in binary format and corresponding support in Solrj client. - (Noble Paul via shalin) - -30. SOLR-763: Add support for Lucene's PositionFilter (Mck SembWever via shalin) - -31. SOLR-966: Enhance the map() function query to take in an optional default value (Noble Paul, shalin) - -32. SOLR-820: Support replication on startup of master with new index. (Noble Paul, Akshay Ukey via shalin) - -33. SOLR-943: Make it possible to specify dataDir in solr.xml and accept the dataDir as a request parameter for - the CoreAdmin create command. (Noble Paul via shalin) - -34. SOLR-850: Addition of timeouts for distributed searching. Configurable through 'shard-socket-timeout' and - 'shard-connection-timeout' parameters in SearchHandler. (Patrick O'Leary via shalin) - -35. SOLR-799: Add support for hash based exact/near duplicate document - handling. (Mark Miller, yonik) - -36. SOLR-1026: Add protected words support to SnowballPorterFilterFactory (ehatcher) - -37. SOLR-739: Add support for OmitTf (Mark Miller via yonik) - -38. SOLR-1046: Nested query support for the function query parser - and lucene query parser (the latter existed as an undocumented - feature in 1.3) (yonik) - -39. SOLR-940: Add support for Lucene's Trie Range Queries by providing new FieldTypes in - schema for int, float, long, double and date. Single-valued Trie based - fields with a precisionStep will index multiple precisions and enable - faster range queries. (Uwe Schindler, yonik, shalin) - -40. SOLR-1038: Enhance CommonsHttpSolrServer to add docs in batch using an iterator API (Noble Paul via shalin) - -41. SOLR-844: A SolrServer implementation to front-end multiple solr servers and provides load balancing and failover - support (Noble Paul, Mark Miller, hossman via shalin) - -42. SOLR-939: ValueSourceRangeFilter/Query - filter based on values in a FieldCache entry or on any arbitrary function of field values. (yonik) - -43. SOLR-1095: Fixed performance problem in the StopFilterFactory and simplified code. Added tests as well. (gsingers) - -44. SOLR-1096: Introduced httpConnTimeout and httpReadTimeout in replication slave configuration to avoid stalled - replication. (Jeff Newburn, Noble Paul, shalin) - -45. SOLR-1115: on and yes work as expected in solrconfig.xml. (koji) - -46. SOLR-1099: A FieldAnalysisRequestHandler which provides the analysis functionality of the web admin page as - a service. The AnalysisRequestHandler is renamed to DocumentAnalysisRequestHandler which is enhanced with - query analysis and showMatch support. AnalysisRequestHandler is now deprecated. Support for both - FieldAnalysisRequestHandler and DocumentAnalysisRequestHandler is also provided in the Solrj client. - (Uri Boness, shalin) - -47. SOLR-1106: Made CoreAdminHandler Actions pluggable so that additional actions may be plugged in or the existing - ones can be overridden if needed. (Kay Kay, Noble Paul, shalin) - -48. SOLR-1124: Add a top() function query that causes it's argument to - have it's values derived from the top level IndexReader, even when - invoked from a sub-reader. top() is implicitly used for the - ord() and rord() functions. (yonik) - -49. SOLR-1110: Support sorting on trie fields with Distributed Search. (Mark Miller, Uwe Schindler via shalin) - -50. SOLR-1121: CoreAdminhandler should not need a core . This makes it possible to start a Solr server w/o a core .(noble) - -51. SOLR-769: Added support for clustering in contrib/clustering. See http://wiki.apache.org/solr/ClusteringComponent for more info. (gsingers, Stanislaw Osinski) - -52. SOLR-1175: disable/enable replication on master side. added two commands 'enableReplication' and 'disableReplication' (noble) - -53. SOLR-1179: DocSets can now be used as Lucene Filters via - DocSet.getTopFilter() (yonik) - -54. SOLR-1116: Add a Binary FieldType (noble) - -55. SOLR-1051: Support the merge of multiple indexes as a CoreAdmin and an update command (Ning Li via shalin) - -56. SOLR-1152: Snapshoot on ReplicationHandler should accept location as a request parameter (shalin) - -57. SOLR-1204: Enhance SpellingQueryConverter to handle UTF-8 instead of ASCII only. - Use the NMTOKEN syntax for matching field names. - (Michael Ludwig, shalin) - -58. SOLR-1189: Support providing username and password for basic HTTP authentication in Java replication - (Matthew Gregg, shalin) - -59. SOLR-243: Add configurable IndexReaderFactory so that alternate IndexReader implementations - can be specified via solrconfig.xml. Note that using a custom IndexReader may be incompatible - with ReplicationHandler (see comments in SOLR-1366). This should be treated as an experimental feature. - (Andrzej Bialecki, hossman, Mark Miller, John Wang) - -60. SOLR-1214: differentiate between solr home and instanceDir .deprecates the method SolrResourceLoader#locateInstanceDir() - and it is renamed to locateSolrHome (noble) - -61. SOLR-1216 : disambiguate the replication command names. 'snappull' becomes 'fetchindex' 'abortsnappull' becomes 'abortfetch' (noble) - -62. SOLR-1145: Add capability to specify an infoStream log file for the underlying Lucene IndexWriter in solrconfig.xml. - This is an advanced debug log file that can be used to aid developers in fixing IndexWriter bugs. See the commented - out example in the example solrconfig.xml under the indexDefaults section. - (Chris Harris, Mark Miller) - -63. SOLR-1256: Show the output of CharFilters in analysis.jsp. (koji) - -64. SOLR-1266: Added stemEnglishPossessive option (default=true) to WordDelimiterFilter - that allows disabling of english possessive stemming (removal of trailing 's from tokens) - (Robert Muir via yonik) - -65. SOLR-1237: firstSearcher and newSearcher can now be identified via the CommonParams.EVENT (evt) parameter - in a request. This allows a RequestHandler or SearchComponent to know when a newSearcher or firstSearcher - event happened. QuerySenderListender is the only implementation in Solr that implements this, but outside - implementations may wish to. See the AbstractSolrEventListener for a helper method. (gsingers) - -66. SOLR-1343: Added HTMLStripCharFilter and marked HTMLStripReader, HTMLStripWhitespaceTokenizerFactory and - HTMLStripStandardTokenizerFactory deprecated. To strip HTML tags, HTMLStripCharFilter can be used - with an arbitrary Tokenizer. (koji) - -67. SOLR-1275: Add expungeDeletes to DirectUpdateHandler2 (noble) - -68. SOLR-1372: Enhance FieldAnalysisRequestHandler to accept field value from content stream (ehatcher) - -69. SOLR-1370: Show the output of CharFilters in FieldAnalysisRequestHandler (koji) - -70. SOLR-1373: Add Filter query to admin/form.jsp - (Jason Rutherglen via hossman) - -71. SOLR-1368: Add ms() function query for getting milliseconds from dates and for - high precision date subtraction, add sub() for subtracting other arguments. - (yonik) - -72. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik) - -73. SOLR-1335 : load core properties from a properties file (noble) - -74. SOLR-1385 : Add an 'enable' attribute to all plugins (noble) - -75. SOLR-1414 : implicit core properties are not set for single core (noble) - -76. SOLR-659 : Adds shards.start and shards.rows to distributed search - to allow more efficient bulk queries (those that retrieve many or all - documents). (Brian Whitman via yonik) - -77. SOLR-1321: Add better support for efficient wildcard handling (Andrzej Bialecki, Robert Muir, gsingers) - -78. SOLR-1326 : New interface PluginInfoInitialized for all types of plugin (noble) - -79. SOLR-1447 : Simple property injection. & syntaxes are now deprecated - (Jason Rutherglen, noble) - -80. SOLR-908 : CommonGramsFilterFactory/CommonGramsQueryFilterFactory for - speeding up phrase queries containing common words by indexing - n-grams and using them at query time. - (Tom Burton-West, Jason Rutherglen via yonik) - -81. SOLR-1292: Add FieldCache introspection to stats.jsp and JMX Monitoring via - a new SolrFieldCacheMBean. (hossman) - -82. SOLR-1167: Solr Config now supports XInclude for XML engines that can support it. (Bryan Talbot via gsingers) - -83. SOLR-1478: Enable sort by Lucene docid. (ehatcher) - -84. SOLR-1449: Add elements to solrconfig.xml to specifying additional - classpath directories and regular expressions. (hossman via yonik) - - -Optimizations ----------------------- - 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the - index that haven't changed. (Mark Miller via yonik) - - 2. SOLR-808: Write string keys in Maps as extern strings in the javabin format. (Noble Paul via shalin) - - 3. SOLR-475: New faceting method with better performance and smaller memory usage for - multi-valued fields with many unique values but relatively few values per document. - Controllable via the facet.method parameter - "fc" is the new default method and "enum" - is the original method. (yonik) - - 4. SOLR-970: Use an ArrayList in SolrPluginUtils.parseQueryStrings - since we know exactly how long the List will be in advance. - (Kay Kay via hossman) - - 5. SOLR-1002: Change SolrIndexSearcher to use insertWithOverflow - with reusable priority queue entries to reduce the amount of - generated garbage during searching. (Mark Miller via yonik) - - 6. SOLR-971: Replace StringBuffer with StringBuilder for instances that do not require thread-safety. - (Kay Kay via shalin) - - 7. SOLR-921: SolrResourceLoader must cache short class name vs fully qualified classname - (Noble Paul, hossman via shalin) - - 8. SOLR-973: CommonsHttpSolrServer writes the xml directly to the server. - (Noble Paul via shalin) - - 9. SOLR-1108: Remove un-needed synchronization in SolrCore constructor. - (Noble Paul via shalin) - -10. SOLR-1166: Speed up docset/filter generation by avoiding top-level - score() call and iterating over leaf readers with TermDocs. (yonik) - -11. SOLR-1169: SortedIntDocSet - a new small set implementation - that saves memory over HashDocSet, is faster to construct, - is ordered for easier implementation of skipTo, and is faster - in the general case. (yonik) - -12. SOLR-1165: Use Lucene Filters and pass them down to the Lucene - search methods to filter earlier and improve performance. (yonik) - -13. SOLR-1111: Use per-segment sorting to share fieldcache elements - across unchanged segments. This saves memory and reduces - commit times for incremental updates to the index. (yonik) - -14. SOLR-1188: Minor efficiency improvement in TermVectorComponent related to ignoring positions or offsets (gsingers) - -15. SOLR-1150: Load Documents for Highlighting one at a time rather than - all at once to avoid OOM with many large Documents. (Siddharth Gargate via Mark Miller) - -16. SOLR-1353: Implement and use reusable token streams for analysis. (Robert Muir, yonik) - -17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables - setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, hossman, gsingers) - -Bug Fixes ----------------------- - 1. SOLR-774: Fixed logging level display (Sean Timm via Otis Gospodnetic) - - 2. SOLR-771: CoreAdminHandler STATUS should display 'normalized' paths (koji, hossman, shalin) - - 3. SOLR-532: WordDelimiterFilter now respects payloads and other attributes of the original Token by - using Token.clone() (Tricia Williams, gsingers) - - 4. SOLR-805: DisMax queries are not being cached in QueryResultCache (Todd Feak via koji) - - 5. SOLR-751: WordDelimiterFilter didn't adjust the start offset of single - tokens that started with delimiters, leading to incorrect highlighting. - (Stefan Oestreicher via yonik) - - 7. SOLR-843: SynonymFilterFactory cannot handle multiple synonym files correctly (koji) - - 8. SOLR-840: BinaryResponseWriter does not handle incompatible data in fields (Noble Paul via shalin) - - 9. SOLR-803: CoreAdminRequest.createCore fails because name parameter isn't set (Sean Colombo via ryan) - -10. SOLR-869: Fix file descriptor leak in SolrResourceLoader#getLines (Mark Miller, shalin) - -11. SOLR-872: Better error message for incorrect copyField destination (Noble Paul via shalin) - -12. SOLR-879: Enable position increments in the query parser and fix the - example schema to enable position increments for the stop filter in - both the index and query analyzers to fix the bug with phrase queries - with stopwords. (yonik) - -13. SOLR-836: Add missing "a" to the example stopwords.txt (yonik) - -14. SOLR-892: Fix serialization of booleans for PHPSerializedResponseWriter - (yonik) - -15. SOLR-898: Fix null pointer exception for the JSON response writer - based formats when nl.json=arrarr with null keys. (yonik) - -16. SOLR-901: FastOutputStream ignores write(byte[]) call. (Noble Paul via shalin) - -17. SOLR-807: BinaryResponseWriter writes fieldType.toExternal if it is not a supported type, - otherwise it writes fieldType.toObject. This fixes the bug with encoding/decoding UUIDField. - (koji, Noble Paul, shalin) - -18. SOLR-863: SolrCore.initIndex should close the directory it gets for clearing the lock and - use the DirectoryFactory. (Mark Miller via shalin) - -19. SOLR-802: Fix a potential null pointer error in the distributed FacetComponent - (David Bowen via ryan) - -20. SOLR-346: Use perl regex to improve accuracy of finding latest snapshot in snapinstaller (billa) - -21. SOLR-830: Use perl regex to improve accuracy of finding latest snapshot in snappuller (billa) - -22. SOLR-897: Fixed Argument list too long error when there are lots of snapshots/backups (Dan Rosher via billa) - -23. SOLR-925: Fixed highlighting on fields with multiValued="true" and termOffsets="true" (koji) - -24. SOLR-902: FastInputStream#read(byte b[], int off, int len) gives incorrect results when amount left to read is less - than buffer size (Noble Paul via shalin) - -25. SOLR-978: Old files are not removed from slaves after replication (Jaco, Noble Paul, shalin) - -26. SOLR-883: Implicit properties are not set for Cores created through CoreAdmin (Noble Paul via shalin) - -27. SOLR-991: Better error message when parsing solrconfig.xml fails due to malformed XML. Error message notes the name - of the file being parsed. (Michael Henson via shalin) - -28. SOLR-1008: Fix stats.jsp XML encoding for item entries with ampersands in their names. (ehatcher) - -29. SOLR-976: deleteByQuery is ignored when deleteById is placed prior to deleteByQuery in a . - Now both delete by id and delete by query can be specified at the same time as follows. (koji) - - 0599106000 - office:Bridgewateroffice:Osaka - - -30. SOLR-1016: HTTP 503 error changes 500 in SolrCore (koji) - -31. SOLR-1015: Incomplete information in replication admin page and http command response when server - is both master and slave i.e. when server is a repeater (Akshay Ukey via shalin) - -32. SOLR-1018: Slave is unable to replicate when server acts as repeater (as both master and slave) - (Akshay Ukey, Noble Paul via shalin) - -33. SOLR-1031: Fix XSS vulnerability in schema.jsp (Paul Lovvik via ehatcher) - -34. SOLR-1064: registry.jsp incorrectly displaying info for last core initialized - regardless of what the current core is. (hossman) - -35. SOLR-1072: absolute paths used in sharedLib attribute were - incorrectly treated as relative paths. (hossman) - -36. SOLR-1104: Fix some rounding errors in LukeRequestHandler's histogram (hossman) - -37. SOLR-1125: Use query analyzer rather than index analyzer for queryFieldType in QueryElevationComponent - (koji) - -38. SOLR-1126: Replicated files have incorrect timestamp (Jian Han Guo, Jeff Newburn, Noble Paul via shalin) - -39. SOLR-1094: Incorrect value of correctlySpelled attribute in some cases (David Smiley, Mark Miller via shalin) - -40. SOLR-965: Better error message when is not configured. - (Mark Miller via hossman) - -41. SOLR-1135: Java replication creates Snapshot in the directory where Solr was launched (Jianhan Guo via shalin) - -42. SOLR-1138: Query Elevation Component now gracefully handles missing queries. (gsingers) - -43. SOLR-929: LukeRequestHandler should return "dynamicBase" only if the field is dynamic. - (Peter Wolanin, koji) - -44. SOLR-1141: NullPointerException during snapshoot command in java based replication (Jian Han Guo, shalin) - -45. SOLR-1078: Fixes to WordDelimiterFilter to avoid splitting or dropping - international non-letter characters such as non spacing marks. (yonik) - -46. SOLR-825, SOLR-1221: Enables highlighting for range/wildcard/fuzzy/prefix queries if using hl.usePhraseHighlighter=true - and hl.highlightMultiTerm=true. Also make both options default to true. (Mark Miller, yonik) - -47. SOLR-1174: Fix Logging admin form submit url for multicore. (Jacob Singh via shalin) - -48. SOLR-1182: Fix bug in OrdFieldSource#equals which could cause a bug with OrdFieldSource caching - on OrdFieldSource#hashcode collisions. (Mark Miller) - -49. SOLR-1207: equals method should compare this and other of DocList in DocSetBase (koji) - -50. SOLR-1242: Human readable JVM info from system handler does integer cutoff rounding, even when dealing - with GB. Fixed to round to one decimal place. (Jay Hill, Mark Miller) - -51. SOLR-1243: Admin RequestHandlers should not be cached over HTTP. (Mark Miller) - -52. SOLR-1260: Fix implementations of set operations for DocList subclasses - and fix a bug in HashDocSet construction when offset != 0. These bugs - never manifested in normal Solr use and only potentially affect - custom code. (yonik) - -53. SOLR-1171: Fix LukeRequestHandler so it doesn't rely on SolrQueryParser - and report incorrect stats when field names contain characters - SolrQueryParser considers special. - (hossman) - -54. SOLR-1317: Fix CapitalizationFilterFactory to work when keep parameter is not specified. - (ehatcher) - -55. SOLR-1342: CapitalizationFilterFactory uses incorrect term length calculations. - (Robert Muir via Mark Miller) - -56. SOLR-1359: DoubleMetaphoneFilter didn't index original tokens if there was no - alternative, and could incorrectly skip or reorder tokens. (yonik) - -57. SOLR-1360: Prevent PhoneticFilter from producing duplicate tokens. (yonik) - -58. SOLR-1371: LukeRequestHandler/schema.jsp errored if schema had no - uniqueKey field. The new test for this also (hopefully) adds some - future proofing against similar bugs in the future. As a side - effect QueryElevationComponentTest was refactored, and a bug in - that test was found. (hossman) - -59. SOLR-914: General finalize() improvements. No finalizer delegates - to the respective close/destroy method w/o first checking if it's - already been closed/destroyed; if it hasn't a, SEVERE error is - logged first. (noble, hossman) - -60. SOLR-1362: WordDelimiterFilter had inconsistent behavior when setting - the position increment of tokens following a token consisting of all - delimiters, and could additionally lose big position increments. - (Robert Muir, yonik) - -61. SOLR-1091: Jetty's use of CESU-8 for code points outside the BMP - resulted in invalid output from the serialized PHP writer. (yonik) - -62. SOLR-1103: LukeRequestHandler (and schema.jsp) have been fixed to - include the "1" (ie: 2**0) bucket in the term histogram data. - (hossman) - -63. SOLR-1398: Add offset corrections in PatternTokenizerFactory. - (Anders Melchiorsen, koji) - -64. SOLR-1400: Properly handle zero-length tokens in TrimFilter. This - was not a bug in any released version. (Peter Wolanin, gsingers) - -65. SOLR-1071: spellcheck.extendedResults returns an invalid JSON response - when count > 1. To fix, the extendedResults format was changed. - (Uri Boness, yonik) - -66. SOLR-1381: Fixed improper handling of fields that have only term positions and not term offsets during Highlighting (Thorsten Fischer, gsingers) - -67. SOLR-1427: Fixed registry.jsp issue with MBeans (gsingers) - -68. SOLR-1468: SolrJ's XML response parsing threw an exception for null - names, such as those produced when facet.missing=true (yonik) - -69. SOLR-1471: Fixed issue with calculating missing values for facets in single valued cases in Stats Component. - This is not correctly calculated for the multivalued case. (James Miller, gsingers) - -70. SOLR-1481: Fixed omitHeader parameter for PHP ResponseWriter. (Jun Ohtani via billa) - -71. SOLR-1448: Add weblogic.xml to solr webapp to enable correct operation in - WebLogic. (Ilan Rabinovitch via yonik) - -72. SOLR-1504: empty char mapping can cause ArrayIndexOutOfBoundsException in analysis.jsp and co. - (koji) - -73. SOLR-1394: HTMLStripCharFilter split tokens that contained entities and - often calculated offsets incorrectly for entities. - (Anders Melchiorsen via yonik) - -74. SOLR-1517: Admin pages could stall waiting for localhost name resolution - if reverse DNS wasn't configured; this was changed so the DNS resolution - is attempted only once the first time an admin page is loaded. - (hossman) - -75. SOLR-1529: More than 8 deleteByQuery commands in a single request - caused an error to be returned, although the deletes were - still executed. (asmodean via yonik) - -Other Changes ----------------------- - 1. Upgraded to Lucene 2.4.0 (yonik) - - 2. SOLR-805: Upgraded to Lucene 2.9-dev (r707499) (koji) - - 3. DumpRequestHandler (/debug/dump): changed 'fieldName' to 'sourceInfo'. (ehatcher) - - 4. SOLR-852: Refactored common code in CSVRequestHandler and XMLUpdateRequestHandler (gsingers, ehatcher) - - 5. SOLR-871: Removed dependency on stax-utils.jar. If you using solr.jar and running - java 6, you can also remove woodstox and geronimo. (ryan) - - 6. SOLR-465: Upgraded to Lucene 2.9-dev (r719351) (shalin) - - 7. SOLR-889: Upgraded to commons-io-1.4.jar and commons-fileupload-1.2.1.jar (ryan) - - 8. SOLR-875: Upgraded to Lucene 2.9-dev (r723985) and consolidated the BitSet implementations (Michael Busch, gsingers) - - 9. SOLR-819: Upgraded to Lucene 2.9-dev (r724059) to get access to Arabic public constructors (gsingers) - and -10. SOLR-900: Moved solrj into /src/solrj. The contents of solr-common.jar is now included - in the solr-solrj.jar. (ryan) - -11. SOLR-924: Code cleanup: make all existing finalize() methods call - super.finalize() in a finally block. All current instances extend - Object, so this doesn't fix any bugs, but helps protect against - future changes. (Kay Kay via hossman) - -12. SOLR-885: NamedListCodec is renamed to JavaBinCodec and returns Object instead of NamedList. - (Noble Paul, yonik via shalin) - -13. SOLR-84: Use new Solr logo in admin (Michiel via koji) - -14. SOLR-981: groupId for Woodstox dependency in maven solrj changed to org.codehaus.woodstox (Tim Taranov via shalin) - -15. Upgraded to Lucene 2.9-dev r738218 (yonik) - -16. SOLR-959: Refactored TestReplicationHandler to remove hardcoded port numbers (hossman, Akshay Ukey via shalin) - -17. Upgraded to Lucene 2.9-dev r742220 (yonik) - -18. SOLR-1022: Better "ignored" field in example schema.xml (Peter Wolanin via hossman) - -19. SOLR-967: New type-safe constructor for NamedList (Kay Kay via hossman) - -20. SOLR-1036: Change default QParser from "lucenePlusSort" to "lucene" to - reduce confusion of semicolon splitting behavior when no sort param is - specified (hossman) - -21. Upgraded to Lucene 2.9-dev r752164 (shalin) - -22. SOLR-1068: Use fsync on replicated index and configuration files (yonik, Noble Paul, shalin) - -23. SOLR-952: Cleanup duplicated code in deprecated HighlightingUtils (hossman) - -24. Upgraded to Lucene 2.9-dev r764281 (shalin) - -25. SOLR-1079: Rename omitTf to omitTermFreqAndPositions (shalin) - -26. SOLR-804: Added Lucene's misc contrib JAR (rev 764281). (gsingers) - -27. Upgraded to Lucene 2.9-dev r768228 (shalin) - -28. Upgraded to Lucene 2.9-dev r768336 (shalin) - -29. SOLR-997: Wait for a longer time for slave to complete replication in TestReplicationHandler - (Mark Miller via shalin) - -30. SOLR-748: FacetComponent helper classes are made public as an experimental API. - (Wojtek Piaseczny via shalin) - -31. Upgraded to Lucene 2.9-dev 773862 (Mark Miller) - -32. Upgraded to Lucene 2.9-dev r776177 (shalin) - -33. SOLR-1149: Made QParserPlugin and related classes extendible as an experimental API. - (Kaktu Chakarabati via shalin) - -34. Upgraded to Lucene 2.9-dev r779312 (yonik) - -35. SOLR-786: Refactor DisMaxQParser to allow overriding certain features of DisMaxQParser - (Wojciech Biela via shalin) - -36. SOLR-458: Add equals and hashCode methods to NamedList (Stefan Rinner, shalin) - -37. SOLR-1184: Add option in solrconfig to open a new IndexReader rather than - using reopen. Done mainly as a fail-safe in the case that a user runs into - a reopen bug/issue. (Mark Miller) - -38. SOLR-1215 use double quotes to enclose attributes in solr.xml (noble) - -39. SOLR-1151: add dynamic copy field and maxChars example to example schema.xml. - (Peter Wolanin, Mark Miller) - -40. SOLR-1233: remove /select?qt=/whatever restriction on /-prefixed request handlers. - (ehatcher) - -41. SOLR-1257: logging.jsp has been removed and now passes through to the - hierarchical log level tool added in Solr 1.3. Users still - hitting "/admin/logging.jsp" should switch to "/admin/logging". - (hossman) - -42. Upgraded to Lucene 2.9-dev r794238. Other changes include: - LUCENE-1614 - Use Lucene's DocIdSetIterator.NO_MORE_DOCS as the sentinel value. - LUCENE-1630 - Add acceptsDocsOutOfOrder method to Collector implementations. - LUCENE-1673, LUCENE-1701 - Trie has moved to Lucene core and renamed to NumericRangeQuery. - LUCENE-1662, LUCENE-1687 - Replace usage of ExtendedFieldCache by FieldCache. - (shalin) - -42. SOLR-1241: Solr's CharFilter has been moved to Lucene. Remove CharFilter and related classes - from Solr and use Lucene's corresponding code (koji via shalin) - -43. SOLR-1261: Lucene trunk renamed RangeQuery & Co to TermRangeQuery (Uwe Schindler via shalin) - -44. Upgraded to Lucene 2.9-dev r801856 (Mark Miller) - -45. SOLR1276: Added StatsComponentTest (Rafa�ł Ku�ć, gsingers) - -46. SOLR-1377: The TokenizerFactory API has changed to explicitly return a Tokenizer - rather then a TokenStream (that may be or may not be a Tokenizer). This change - is required to take advantage of the Token reuse improvements in lucene 2.9. (ryan) - -47. SOLR-1410: Log a warning if the deprecated charset option is used - on GreekLowerCaseFilterFactory, RussianStemFilterFactory, - RussianLowerCaseFilterFactory or RussianLetterTokenizerFactory. - (Robert Muir via hossman) - -48. SOLR-1423: Due to LUCENE-1906, Solr's tokenizer should use Tokenizer.correctOffset() instead of CharStream.correctOffset(). - (Uwe Schindler via koji) - -49. SOLR-1319, SOLR-1345: Upgrade Solr Highlighter classes to new Lucene Highlighter API. This upgrade has - resulted in a back compat break in the DefaultSolrHighlighter class - getQueryScorer is no longer - protected. If you happened to be overriding that method in custom code, overide getHighlighter instead. - Also, HighlightingUtils#getQueryScorer has been removed as it was deprecated and backcompat has been - broken with it anyway. (Mark Miller) - -Build ----------------------- - 1. SOLR-776: Added in ability to sign artifacts via Ant for releases (gsingers) - - 2. SOLR-854: Added run-example target (Mark Miller via ehatcher) - - 3. SOLR-1054:Fix dist-src target for DataImportHandler (Ryuuichi Kumai via shalin) - - 4. SOLR-1219: Added proxy.setup target (koji) - - 5. SOLR-1386: In build.xml, use longfile="gnu" in tar task to avoid warnings about long file names - (Mark Miller via shalin) - - 6. SOLR-1441: Make it possible to run all tests in a package (shalin) - - -Documentation ----------------------- - 1. SOLR-789: The javadoc of RandomSortField is not readable (Nicolas Lalev�Ã�e via koji) - - 2. SOLR-962: Note about null handling in ModifiableSolrParams.add javadoc - (Kay Kay via hossman) - - 3. SOLR-1409: Added Solr Powered By Logos - -================== Release 1.3.0 20080915 ================== - -Upgrading from Solr 1.2 ------------------------ -IMPORTANT UPGRADE NOTE: In a master/slave configuration, all searchers/slaves -should be upgraded before the master! If the master were to be updated -first, the older searchers would not be able to read the new index format. - -The Porter snowball based stemmers in Lucene were updated (LUCENE-1142), -and are not guaranteed to be backward compatible at the index level -(the stem of certain words may have changed). Re-indexing is recommended. - -Older Apache Solr installations can be upgraded by replacing -the relevant war file with the new version. No changes to configuration -files should be needed. - -This version of Solr contains a new version of Lucene implementing -an updated index format. This version of Solr/Lucene can still read -and update indexes in the older formats, and will convert them to the new -format on the first index change. Be sure to backup your index before -upgrading in case you need to downgrade. - -Solr now recognizes HTTP Request headers related to HTTP Caching (see -RFC 2616 sec13) and will by default respond with "304 Not Modified" -when appropriate. This should only affect users who access Solr via -an HTTP Cache, or via a Web-browser that has an internal cache, but if -you wish to suppress this behavior an '' -option can be added to your solrconfig.xml. See the wiki (or the -example solrconfig.xml) for more details... - http://wiki.apache.org/solr/SolrConfigXml#HTTPCaching - -In Solr 1.2, DateField did not enforce the canonical representation of -the ISO 8601 format when parsing incoming data, and did not generation -the canonical format when generating dates from "Date Math" strings -(particularly as it pertains to milliseconds ending in trailing zeros) --- As a result equivalent dates could not always be compared properly. -This problem is corrected in Solr 1.3, but DateField users that might -have been affected by indexing inconsistent formats of equivilent -dates (ie: 1995-12-31T23:59:59Z vs 1995-12-31T23:59:59.000Z) may want -to consider reindexing to correct these inconsistencies. Users who -depend on some of the the "broken" behavior of DateField in Solr 1.2 -(specificly: accepting any input that ends in a 'Z') should consider -using the LegacyDateField class as a possible alternative. Users that -desire 100% backwards compatibility should consider using the Solr 1.2 -version of DateField. - -Due to some changes in the lifecycle of TokenFilterFactories, users of -Solr 1.2 who have written Java code which constructs new instances of -StopFilterFactory, SynonymFilterFactory, or EnglishProterFilterFactory -will need to modify their code by adding a line like the following -prior to using the factory object... - factory.inform(SolrCore.getSolrCore().getSolrConfig().getResourceLoader()); -These lifecycle changes do not affect people who use Solr "out of the -box" or who have developed their own TokenFilterFactory plugins. More -info can be found in SOLR-594. - -The python client that used to ship with Solr is no longer included in -the distribution (see client/python/README.txt). - -Detailed Change List --------------------- - -New Features - 1. SOLR-69: Adding MoreLikeThisHandler to search for similar documents using - lucene contrib/queries MoreLikeThis. MoreLikeThis is also available from - the StandardRequestHandler using ?mlt=true. (bdelacretaz, ryan) - - 2. SOLR-253: Adding KeepWordFilter and KeepWordFilterFactory. A TokenFilter - that keeps tokens with text in the registered keeplist. This behaves like - the inverse of StopFilter. (ryan) - - 3. SOLR-257: WordDelimiterFilter has a new parameter splitOnCaseChange, - which can be set to 0 to disable splitting "PowerShot" => "Power" "Shot". - (klaas) - - 4. SOLR-193: Adding SolrDocument and SolrInputDocument to represent documents - outside of the lucene Document infrastructure. This class will be used - by clients and for processing documents. (ryan) - - 5. SOLR-244: Added ModifiableSolrParams - a SolrParams implementation that - help you change values after initialization. (ryan) - - 6. SOLR-20: Added a java client interface with two implementations. One - implementation uses commons httpclient to connect to solr via HTTP. The - other connects to solr directly. Check client/java/solrj. This addition - also includes tests that start jetty and test a connection using the full - HTTP request cycle. (Darren Erik Vengroff, Will Johnson, ryan) - - 7. SOLR-133: Added StaxUpdateRequestHandler that uses StAX for XML parsing. - This implementation has much better error checking and lets you configure - a custom UpdateRequestProcessor that can selectively process update - requests depending on the request attributes. This class will likely - replace XmlUpdateRequestHandler. (Thorsten Scherler, ryan) - - 8. SOLR-264: Added RandomSortField, a utility field with a random sort order. - The seed is based on a hash of the field name, so a dynamic field - of this type is useful for generating different random sequences. - This field type should only be used for sorting or as a value source - in a FunctionQuery (ryan, hossman, yonik) - - 9. SOLR-266: Adding show=schema to LukeRequestHandler to show the parsed - schema fields and field types. (ryan) - -10. SOLR-133: The UpdateRequestHandler now accepts multiple delete options - within a single request. For example, sending: - 12 will delete both 1 and 2. (ryan) - -11. SOLR-269: Added UpdateRequestProcessor plugin framework. This provides - a reasonable place to process documents after they are parsed and - before they are committed to the index. This is a good place for custom - document manipulation or document based authorization. (yonik, ryan) - -12. SOLR-260: Converting to a standard PluginLoader framework. This reworks - RequestHandlers, FieldTypes, and QueryResponseWriters to share the same - base code for loading and initializing plugins. This adds a new - configuration option to define the default RequestHandler and - QueryResponseWriter in XML using default="true". (ryan) - -13. SOLR-225: Enable pluggable highlighting classes. Allow configurable - highlighting formatters and Fragmenters. (ryan) - -14. SOLR-273/376/452/516: Added hl.maxAnalyzedChars highlighting parameter, defaulting - to 50k, hl.alternateField, which allows the specification of a backup - field to use as summary if no keywords are matched, and hl.mergeContiguous, - which combines fragments if they are adjacent in the source document. - (klaas, Grant Ingersoll, Koji Sekiguchi via klaas) - -15. SOLR-291: Control maximum number of documents to cache for any entry - in the queryResultCache via queryResultMaxDocsCached solrconfig.xml - entry. (Koji Sekiguchi via yonik) - -16. SOLR-240: New configuration setting in and - blocks supports all Lucene builtin LockFactories. - 'single' is recommended setting, but 'simple' is default for total - backwards compatibility. - (Will Johnson via hossman) - -17. SOLR-248: Added CapitalizationFilterFactory that creates tokens with - normalized capitalization. This filter is useful for facet display, - but will not work with a prefix query. (ryan) - SOLR-468: Change to the semantics to keep the original token, not the - token in the Map. Also switched to use Lucene's new reusable token - capabilities. (gsingers) - -18. SOLR-307: Added NGramFilterFactory and EdgeNGramFilterFactory. - (Thomas Peuss via Otis Gospodnetic) - -19. SOLR-305: analysis.jsp can be given a fieldtype instead of a field - name. (hossman) - -20. SOLR-102: Added RegexFragmenter, which splits text for highlighting - based on a given pattern. (klaas) - -21. SOLR-258: Date Faceting added to SimpleFacets. Facet counts - computed for ranges of size facet.date.gap (a DateMath expression) - between facet.date.start and facet.date.end. (hossman) - -22. SOLR-196: A PHP serialized "phps" response writer that returns a - serialized array that can be used with the PHP function unserialize, - and a PHP response writer "php" that may be used by eval. - (Nick Jenkin, Paul Borgermans, Pieter Berkel via yonik) - -23. SOLR-308: A new UUIDField class which accepts UUID string values, - as well as the special value of "NEW" which triggers generation of - a new random UUID. - (Thomas Peuss via hossman) - -24. SOLR-349: New FunctionQuery functions: sum, product, div, pow, log, - sqrt, abs, scale, map. Constants may now be used as a value source. - (yonik) - -25. SOLR-359: Add field type className to Luke response, and enabled access - to the detailed field information from the solrj client API. - (Grant Ingersoll via ehatcher) - -26. SOLR-334: Pluggable query parsers. Allows specification of query - type and arguments as a prefix on a query string. (yonik) - -27. SOLR-351: External Value Source. An external file may be used - to specify the values of a field, currently usable as - a ValueSource in a FunctionQuery. (yonik) - -28. SOLR-395: Many new features for the spell checker implementation, including - an extended response mode with much richer output, multi-word spell checking, - and a bevy of new and renamed options (see the wiki). - (Mike Krimerman, Scott Taber via klaas). - -29. SOLR-408: Added PingRequestHandler and deprecated SolrCore.getPingQueryRequest(). - Ping requests should be configured using standard RequestHandler syntax in - solrconfig.xml rather then using the syntax. - (Karsten Sperling via ryan) - -30. SOLR-281: Added a 'Search Component' interface and converted StandardRequestHandler - and DisMaxRequestHandler to use this framework. - (Sharad Agarwal, Henri Biestro, yonik, ryan) - -31. SOLR-176: Add detailed timing data to query response output. The SearchHandler - interface now returns how long each section takes. (klaas) - -32. SOLR-414: Plugin initialization now supports SolrCore and ResourceLoader "Aware" - plugins. Plugins that implement SolrCoreAware or ResourceLoaderAware are - informed about the SolrCore/ResourceLoader. (Henri Biestro, ryan) - -33. SOLR-350: Support multiple SolrCores running in the same solr instance and allows - runtime runtime management for any running SolrCore. If a solr.xml file exists - in solr.home, this file is used to instanciate multiple cores and enables runtime - core manipulation. For more informaion see: http://wiki.apache.org/solr/CoreAdmin - (Henri Biestro, ryan) - -34. SOLR-447: Added an single request handler that will automatically register all - standard admin request handlers. This replaces the need to register (and maintain) - the set of admin request handlers. Assuming solrconfig.xml includes: - - This will register: Luke/SystemInfo/PluginInfo/ThreadDump/PropertiesRequestHandler. - (ryan) - -35. SOLR-142: Added RawResponseWriter and ShowFileRequestHandler. This returns config - files directly. If AdminHandlers are configured, this will be added automatically. - The jsp files /admin/get-file.jsp and /admin/raw-schema.jsp have been deprecated. - The deprecated will be automatically registered with - a ShowFileRequestHandler instance for backwards compatibility. (ryan) - -36. SOLR-446: TextResponseWriter can write SolrDocuments and SolrDocumentLists the - same way it writes Document and DocList. (yonik, ryan) - -37. SOLR-418: Adding a query elevation component. This is an optional component to - elevate some documents to the top positions (or exclude them) for a given query. - (ryan) - -38. SOLR-478: Added ability to get back unique key information from the LukeRequestHandler. - (gsingers) - -39. SOLR-127: HTTP Caching awareness. Solr now recognizes HTTP Request - headers related to HTTP Caching (see RFC 2616 sec13) and will respond - with "304 Not Modified" when appropriate. New options have been added - to solrconfig.xml to influence this behavior. - (Thomas Peuss via hossman) - -40. SOLR-303: Distributed Search over HTTP. Specification of shards - argument causes Solr to query those shards and merge the results - into a single response. Querying, field faceting (sorted only), - query faceting, highlighting, and debug information are supported - in distributed mode. - (Sharad Agarwal, Patrick O'Leary, Sabyasachi Dalal, Stu Hood, - Jayson Minard, Lars Kotthoff, ryan, yonik) - -41. SOLR-356: Pluggable functions (value sources) that allow - registration of new functions via solrconfig.xml - (Doug Daniels via yonik) - -42. SOLR-494: Added cool admin Ajaxed schema explorer. - (Greg Ludington via ehatcher) - -43. SOLR-497: Added date faceting to the QueryResponse in SolrJ - and QueryResponseTest (Shalin Shekhar Mangar via gsingers) - -44. SOLR-486: Binary response format, faster and smaller - than XML and JSON response formats (use wt=javabin). - BinaryResponseParser for utilizing the binary format via SolrJ - and is now the default. - (Noble Paul, yonik) - -45. SOLR-521: StopFilterFactory support for "enablePositionIncrements" - (Walter Ferrara via hossman) - -46. SOLR-557: Added SolrCore.getSearchComponents() to return an unmodifiable Map. (gsingers) - -47. SOLR-516: Added hl.maxAlternateFieldLength parameter, to set max length for hl.alternateField - (Koji Sekiguchi via klaas) - -48. SOLR-319: Changed SynonymFilterFactory to "tokenize" synonyms file. - To use a tokenizer, specify "tokenizerFactory" attribute in . - For example: - - - (koji) - -49. SOLR-515: Added SimilarityFactory capability to schema.xml, - making config file parameters usable in the construction of - the global Lucene Similarity implementation. - (ehatcher) - -50. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and - from SolrDocuments. (Noble Paul via ryan) - -51. SOLR-595: Add support for Field level boosting in the MoreLikeThis Handler. - (Tom Morton, gsingers) - -52. SOLR-572: Added SpellCheckComponent and org.apache.solr.spelling package to support more spell - checking functionality. Also includes ability to add your own SolrSpellChecker implementation that - plugs in. See http://wiki.apache.org/solr/SpellCheckComponent for more details - (Shalin Shekhar Mangar, Bojan Smid, gsingers) - -53. SOLR-679: Added accessor methods to Lucene based spell checkers (gsingers) - -54. SOLR-423: Added Request Handler close hook notification so that RequestHandlers can be notified - when a core is closing. (gsingers, ryan) - -55. SOLR-603: Added ability to partially optimize. (gsingers) - -56. SOLR-483: Add byte/short sorting support (gsingers) - -57. SOLR-14: Add preserveOriginal flag to WordDelimiterFilter - (Geoffrey Young, Trey Hyde, Ankur Madnani, yonik) - -58. SOLR-502: Add search timeout support. (Sean Timm via yonik) - -59. SOLR-605: Add the ability to register callbacks programatically (ryan, Noble Paul) - -60. SOLR-610: hl.maxAnalyzedChars can be -1 to highlight everything (Lars Kotthoff via klaas) - -61. SOLR-522: Make analysis.jsp show payloads. (Tricia Williams via yonik) - -62. SOLR-611: Expose sort_values returned by QueryComponent in SolrJ's QueryResponse - (Dan Rosher via shalin) - -63. SOLR-256: Support exposing Solr statistics through JMX (Sharad Agrawal, shalin) - -64. SOLR-666: Expose warmup time in statistics for SolrIndexSearcher and LRUCache (shalin) - -65. SOLR-663: Allow multiple files for stopwords, keepwords, protwords and synonyms - (Otis Gospodnetic, shalin) - -66. SOLR-469: Added DataImportHandler as a contrib project which makes indexing data from Databases, - XML files and HTTP data sources into Solr quick and easy. Includes API and implementations for - supporting multiple data sources, processors and transformers for importing data. Supports full - data imports as well as incremental (delta) indexing. See http://wiki.apache.org/solr/DataImportHandler - for more details. (Noble Paul, shalin) - -67. SOLR-622: SpellCheckComponent supports auto-loading indices on startup and optionally, (re)builds - indices on newSearcher event, if configured in solrconfig.xml (shalin) - -68. SOLR-554: Hierarchical JDK log level selector for SOLR Admin replaces logging.jsp - (Sean Timm via shalin) - -69. SOLR-506: Emitting HTTP Cache headers can be enabled or disabled through configuration on a - per-handler basis (shalin) - -70. SOLR-716: Added support for properties in configuration files. Properties can be specified in - solr.xml and can be used in solrconfig.xml and schema.xml (Henri Biestro, hossman, ryan, shalin) - -71. SOLR-1129 : Support binding dynamic fields to beans in SolrJ (Avlesh Singh , noble) - -72. SOLR-920 : Cache and reuse IndexSchema . A new attribute added in solr.xml called 'shareSchema' (noble) - -Changes in runtime behavior - 1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This - removes the maxBufferedDeletes parameter added by SOLR-310 as Lucene - now manages the deletes. This provides slightly better indexing - performance and makes overwrites atomic, eliminating the possibility of - a crash causing duplicates. (yonik) - - 2. SOLR-689 / SOLR-695: If you have used "MultiCore" functionality in an unreleased - version of 1.3-dev, many classes and configs have been renamed for the official - 1.3 release. Speciffically, solr.xml has replaced multicore.xml, and uses a slightly - different syntax. The solrj classes: MultiCore{Request/Response/Params} have been - renamed: CoreAdmin{Request/Response/Params} (hossman, ryan, Henri Biestro) - - 3. SOLR-647: reference count the SolrCore uses to prevent a premature - close while a core is still in use. (Henri Biestro, Noble Paul, yonik) - - 4. SOLR-737: SolrQueryParser now uses a ConstantScoreQuery for wildcard - queries that prevent an exception from being thrown when the number - of matching terms exceeds the BooleanQuery clause limit. (yonik) - -Optimizations - 1. SOLR-276: improve JSON writer speed. (yonik) - - 2. SOLR-310: bound and reduce memory usage by providing parameter, - which flushes deleted without forcing the user to use for this purpose. - (klaas) - - 3. SOLR-348: short-circuit faceting if less than mincount docs match. (yonik) - - 4. SOLR-354: Optimize removing all documents. Now when a delete by query - of *:* is issued, the current index is removed. (yonik) - - 5. SOLR-377: Speed up response writers. (yonik) - - 6. SOLR-342: Added support into the SolrIndexWriter for using several new features of the new - LuceneIndexWriter, including: setRAMBufferSizeMB(), setMergePolicy(), setMergeScheduler. - Also, added support to specify Lucene's autoCommit functionality (not to be confused with Solr's - similarily named autoCommit functionality) via the config. item. See the test - and example solrconfig.xml section for usage. Performance during indexing should - be significantly increased by moving up to 2.3 due to Lucene's new indexing capabilities. - Furthermore, the setRAMBufferSizeMB makes it more logical to decide on tuning factors related to - indexing. For best performance, leave the mergePolicy and mergeScheduler as the defaults and set - ramBufferSizeMB instead of maxBufferedDocs. The best value for this depends on the types of - documents in use. 32 should be a good starting point, but reports have shown up to 48 MB provides - good results. Note, it is acceptable to set both ramBufferSizeMB and maxBufferedDocs, and Lucene - will flush based on whichever limit is reached first. (gsingers) - - 7. SOLR-330: Converted TokenStreams to use Lucene's new char array based - capabilities. (gsingers) - - 8. SOLR-624: Only take snapshots if there are differences to the index (Richard Trey Hyde via gsingers) - - 9. SOLR-587: Delete by Query performance greatly improved by using - new underlying Lucene IndexWriter implementation. (yonik) - -10. SOLR-730: Use read-only IndexReaders that don't synchronize - isDeleted(). This will speed up function queries and *:* queries - as well as improve their scalability on multi-CPU systems. - (Mark Miller via yonik) - -Bug Fixes - 1. Make TextField respect sortMissingFirst and sortMissingLast fields. - (J.J. Larrea via yonik) - - 2. autoCommit/maxDocs was not working properly when large autoCommit/maxTime - was specified (klaas) - - 3. SOLR-283: autoCommit was not working after delete. (ryan) - - 4. SOLR-286: ContentStreamBase was not using default encoding for getBytes() - (Toru Matsuzawa via ryan) - - 5. SOLR-292: Fix MoreLikeThis facet counting. (Pieter Berkel via ryan) - - 6. SOLR-297: Fix bug in RequiredSolrParams where requiring a field - specific param would fail if a general default value had been supplied. - (hossman) - - 7. SOLR-331: Fix WordDelimiterFilter handling of offsets for synonyms or - other injected tokens that can break highlighting. (yonik) - - 8. SOLR-282: Snapshooter does not work on Solaris and OS X since the cp command - there does not have the -l option. Also updated commit/optimize related - scripts to handle both old and new response format. (bill) - - 9. SOLR-294: Logging of elapsed time broken on Solaris because the date command - there does not support the %s output format. (bill) - -10. SOLR-136: Snappuller - "date -d" and locales don't mix. (J�Ã�rgen Hermann via bill) - -11. SOLR-333: Changed distributiondump.jsp to use Solr HOME instead of CWD to set path. - -12. SOLR-393: Removed duplicate contentType from raw-schema.jsp. (bill) - -13. SOLR-413: Requesting a large numbers of documents to be returned (limit) - can result in an out-of-memory exception, even for a small index. (yonik) - -14. The CSV loader incorrectly threw an exception when given - header=true (the default). (ryan, yonik) - -15. SOLR-449: the python and ruby response writers are now able to correctly - output NaN and Infinity in their respective languages. (klaas) - -16. SOLR-42: HTMLStripReader tokenizers now preserve correct source - offsets for highlighting. (Grant Ingersoll via yonik) - -17. SOLR-481: Handle UnknownHostException in _info.jsp (gsingers) - -18. SOLR-324: Add proper support for Long and Doubles in sorting, etc. (gsingers) - -19. SOLR-496: Cache-Control max-age changed to Long so Expires - calculation won't cause overflow. (Thomas Peuss via hossman) - -20. SOLR-535: Fixed typo (Tokenzied -> Tokenized) in schema.jsp (Thomas Peuss via billa) - -21. SOLR-529: Better error messages from SolrQueryParser when field isn't - specified and there is no defaultSearchField in schema.xml - (Lars Kotthoff via hossman) - -22. SOLR-530: Better error messages/warnings when parsing schema.xml: - field using bogus fieldtype and multiple copyFields to a non-multiValue - field. (Shalin Shekhar Mangar via hossman) - -23. SOLR-528: Better error message when defaultSearchField is bogus or not - indexed. (Lars Kotthoff via hossman) - -24. SOLR-533: Fixed tests so they don't use hardcoded port numbers. - (hossman) - -25. SOLR-400: SolrExceptionTest should now handle using OpenDNS as a DNS provider (gsingers) - -26. SOLR-541: Legacy XML update support (provided by SolrUpdateServlet - when no RequestHandler is mapped to "/update") now logs error correctly. - (hossman) - -27. SOLR-267: Changed logging to report number of hits, and also provide a mechanism to add log - messages to be output by the SolrCore via a NamedList toLog member variable. - (Will Johnson, yseeley, gsingers) - - SOLR-267: Removed adding values to the HTTP headers in SolrDispatchFilter (gsingers) - -28. SOLR-509: Moved firstSearcher event notification to the end of the SolrCore constructor - (Koji Sekiguchi via gsingers) - -29. SOLR-470, SOLR-552, SOLR-544, SOLR-701: Multiple fixes to DateField - regarding lenient parsing of optional milliseconds, and correct - formating using the canonical representation. LegacyDateField has - been added for people who have come to depend on the existing - broken behavior. (hossman, Stefan Oestreicher) - -30. SOLR-539: Fix for non-atomic long counters and a cast fix to avoid divide - by zero. (Sean Timm via Otis Gospodnetic) - -31. SOLR-514: Added explicit media-type with UTF* charset to *.xsl files that - don't already have one. (hossman) - -32. SOLR-505: Give RequestHandlers the possiblity to suppress the generation - of HTTP caching headers. (Thomas Peuss via Otis Gospodnetic) - -33. SOLR-553: Handle highlighting of phrase terms better when - hl.usePhraseHighligher=true URL param is used. - (Bojan Smid via Otis Gospodnetic) - -34. SOLR-590: Limitation in pgrep on Linux platform breaks script-utils fixUser. - (Hannes Schmidt via billa) - -35. SOLR-597: SolrServlet no longer "caches" SolrCore. This was causing - problems in Resin, and could potentially cause problems for customized - usages of SolrServlet. - -36. SOLR-585: Now sets the QParser on the ResponseBuilder (gsingers) - -37. SOLR-604: If the spellchecking path is relative, make it relative to the Solr Data Directory. - (Shalin Shekhar Mangar via gsingers) - -38. SOLR-584: Make stats.jsp and stats.xsl more robust. - (Yousef Ourabi and hossman) - -39. SOLR-443: SolrJ: Declare UTF-8 charset on POSTed parameters - to avoid problems with servlet containers that default to latin-1 - and allow switching of the exact POST mechanism for parameters - via useMultiPartPost in CommonsHttpSolrServer. - (Lars Kotthoff, Andrew Schurman, ryan, yonik) - -40. SOLR-556: multi-valued fields always highlighted in disparate snippets - (Lars Kotthoff via klaas) - -41. SOLR-501: Fix admin/analysis.jsp UTF-8 input for some other servlet - containers such as Tomcat. (Hiroaki Kawai, Lars Kotthoff via yonik) - -42. SOLR-616: SpellChecker accuracy configuration is not applied for FileBasedSpellChecker. - Apply it for FileBasedSpellChecker and IndexBasedSpellChecker both. - (shalin) - -43. SOLR-648: SpellCheckComponent throws NullPointerException on using spellcheck.q request - parameter after restarting Solr, if reload is called but build is not called. - (Jonathan Lee, shalin) - -44. SOLR-598: DebugComponent now always occurs last in the SearchHandler list unless the - components are explicitly declared. (gsingers) - -45. SOLR-676: DataImportHandler should use UpdateRequestProcessor API instead of directly - using UpdateHandler. (shalin) - -46. SOLR-696: Fixed bug in NamedListCodec in regards to serializing Iterable objects. (gsingers) - -47. SOLR-669: snappuler fix for FreeBSD/Darwin (Richard "Trey" Hyde via Otis Gospodnetic) - -48. SOLR-606: Fixed spell check collation offset issue. (Stefan Oestreicher , Geoffrey Young, gsingers) - -49. SOLR-589: Improved handling of badly formated query strings (Sean Timm via Otis Gospodnetic) - -50. SOLR-749: Allow QParser and ValueSourceParsers to be extended with same name (hossman, gsingers) - -Other Changes - 1. SOLR-135: Moved common classes to org.apache.solr.common and altered the - build scripts to make two jars: apache-solr-1.3.jar and - apache-solr-1.3-common.jar. This common.jar can be used in client code; - It does not have lucene or junit dependencies. The original classes - have been replaced with a @Deprecated extended class and are scheduled - to be removed in a later release. While this change does not affect API - compatibility, it is recommended to update references to these - deprecated classes. (ryan) - - 2. SOLR-268: Tweaks to post.jar so it prints the error message from Solr. - (Brian Whitman via hossman) - - 3. Upgraded to Lucene 2.2.0; June 18, 2007. - - 4. SOLR-215: Static access to SolrCore.getSolrCore() and SolrConfig.config - have been deprecated in order to support multiple loaded cores. - (Henri Biestro via ryan) - - 5. SOLR-367: The create method in all TokenFilter and Tokenizer Factories - provided by Solr now declare their specific return types instead of just - using "TokenStream" (hossman) - - 6. SOLR-396: Hooks add to build system for automatic generation of (stub) - Tokenizer and TokenFilter Factories. - Also: new Factories for all Tokenizers and TokenFilters provided by the - lucene-analyzers-2.2.0.jar -- includes support for German, Chinese, - Russan, Dutch, Greek, Brazilian, Thai, and French. (hossman) - - 7. Upgraded to commons-CSV r609327, which fixes escaping bugs and - introduces new escaping and whitespace handling options to - increase compatibility with different formats. (yonik) - - 8. Upgraded to Lucene 2.3.0; Jan 23, 2008. - - 9. SOLR-451: Changed analysis.jsp to use POST instead of GET, also made the input area a - bit bigger (gsingers) - -10. Upgrade to Lucene 2.3.1 - -11. SOLR-531: Different exit code for rsyncd-start and snappuller if disabled (Thomas Peuss via billa) - -12. SOLR-550: Clarified DocumentBuilder addField javadocs (gsingers) - -13. Upgrade to Lucene 2.3.2 - -14. SOLR-518: Changed luke.xsl to use divs w/css for generating histograms - instead of SVG (Thomas Peuss via hossman) - -15. SOLR-592: Added ShardParams interface and changed several string literals - to references to constants in CommonParams. - (Lars Kotthoff via Otis Gospodnetic) - -16. SOLR-520: Deprecated unused LengthFilter since already core in - Lucene-Java (hossman) - -17. SOLR-645: Refactored SimpleFacetsTest (Lars Kotthoff via hossman) - -18. SOLR-591: Changed Solrj default value for facet.sort to true (Lars Kotthoff via Shalin) - -19. Upgraded to Lucene 2.4-dev (r669476) to support SOLR-572 (gsingers) - -20. SOLR-636: Improve/simplify example configs; and make index.jsp - links more resilient to configs loaded via an InputStream - (Lars Kotthoff, hossman) - -21. SOLR-682: Scripts now support FreeBSD (Richard Trey Hyde via gsingers) - -22. SOLR-489: Added in deprecation comments. (Sean Timm, Lars Kothoff via gsingers) - -23. SOLR-692: Migrated to stable released builds of StAX API 1.0.1 and StAX 1.2.0 (shalin) -24. Upgraded to Lucene 2.4-dev (r686801) (yonik) -25. Upgraded to Lucene 2.4-dev (r688745) 27-Aug-2008 (yonik) -26. Upgraded to Lucene 2.4-dev (r691741) 03-Sep-2008 (yonik) -27. Replaced the StAX reference implementation with the geronimo - StAX API jar, and the Woodstox StAX implementation. (yonik) - -Build - 1. SOLR-411. Changed the names of the Solr JARs to use the defacto standard JAR names based on - project-name-version.jar. This yields, for example: - apache-solr-common-1.3-dev.jar - apache-solr-solrj-1.3-dev.jar - apache-solr-1.3-dev.jar - - 2. SOLR-479: Added clover code coverage targets for committers and the nightly build. Requires - the Clover library, as licensed to Apache and only available privately. To run: - ant -Drun.clover=true clean clover test generate-clover-reports - - 3. SOLR-510: Nightly release includes client sources. (koji) - - 4. SOLR-563: Modified the build process to build contrib projects - (Shalin Shekhar Mangar via Otis Gospodnetic) - - 5. SOLR-673: Modify build file to create javadocs for core, solrj, contrib and "all inclusive" (shalin) - - 6. SOLR-672: Nightly release includes contrib sources. (Jeremy Hinegardner, shalin) - - 7. SOLR-586: Added ant target and POM files for building maven artifacts of the Solr core, common, - client and contrib. The target can publish artifacts with source and javadocs. - (Spencer Crissman, Craig McClanahan, shalin) - -================== Release 1.2, 20070602 ================== - -Upgrading from Solr 1.1 -------------------------------------- -IMPORTANT UPGRADE NOTE: In a master/slave configuration, all searchers/slaves -should be upgraded before the master! If the master were to be updated -first, the older searchers would not be able to read the new index format. - -Older Apache Solr installations can be upgraded by replacing -the relevant war file with the new version. No changes to configuration -files should be needed. - -This version of Solr contains a new version of Lucene implementing -an updated index format. This version of Solr/Lucene can still read -and update indexes in the older formats, and will convert them to the new -format on the first index change. One change in the new index format -is that all "norms" are kept in a single file, greatly reducing the number -of files per segment. Users of compound file indexes will want to consider -converting to the non-compound format for faster indexing and slightly better -search concurrency. - -The JSON response format for facets has changed to make it easier for -clients to retain sorted order. Use json.nl=map explicitly in clients -to get the old behavior, or add it as a default to the request handler -in solrconfig.xml - -The Lucene based Solr query syntax is slightly more strict. -A ':' in a field value must be escaped or the whole value must be quoted. - -The Solr "Request Handler" framework has been updated in two key ways: -First, if a Request Handler is registered in solrconfig.xml with a name -starting with "/" then it can be accessed using path-based URL, instead of -using the legacy "/select?qt=name" URL structure. Second, the Request -Handler framework has been extended making it possible to write Request -Handlers that process streams of data for doing updates, and there is a -new-style Request Handler for XML updates given the name of "/update" in -the example solrconfig.xml. Existing installations without this "/update" -handler will continue to use the old update servlet and should see no -changes in behavior. For new-style update handlers, errors are now -reflected in the HTTP status code, Content-type checking is more strict, -and the response format has changed and is controllable via the wt -parameter. - - - -Detailed Change List --------------------- - -New Features - 1. SOLR-82: Default field values can be specified in the schema.xml. - (Ryan McKinley via hossman) - - 2. SOLR-89: Two new TokenFilters with corresponding Factories... - * TrimFilter - Trims leading and trailing whitespace from Tokens - * PatternReplaceFilter - applies a Pattern to each token in the - stream, replacing match occurances with a specified replacement. - (hossman) - - 3. SOLR-91: allow configuration of a limit of the number of searchers - that can be warming in the background. This can be used to avoid - out-of-memory errors, or contention caused by more and more searchers - warming in the background. An error is thrown if the limit specified - by maxWarmingSearchers in solrconfig.xml is exceeded. (yonik) - - 4. SOLR-106: New faceting parameters that allow specification of a - minimum count for returned facets (facet.mincount), paging through facets - (facet.offset, facet.limit), and explicit sorting (facet.sort). - facet.zeros is now deprecated. (yonik) - - 5. SOLR-80: Negative queries are now allowed everywhere. Negative queries - are generated and cached as their positive counterpart, speeding - generation and generally resulting in smaller sets to cache. - Set intersections in SolrIndexSearcher are more efficient, - starting with the smallest positive set, subtracting all negative - sets, then intersecting with all other positive sets. (yonik) - - 6. SOLR-117: Limit a field faceting to constraints with a prefix specified - by facet.prefix or f..facet.prefix. (yonik) - - 7. SOLR-107: JAVA API: Change NamedList to use Java5 generics - and implement Iterable (Ryan McKinley via yonik) - - 8. SOLR-104: Support for "Update Plugins" -- RequestHandlers that want - access to streams of data for doing updates. ContentStreams can come - from the raw POST body, multi-part form data, or remote URLs. - Included in this change is a new SolrDispatchFilter that allows - RequestHandlers registered with names that begin with a "/" to be - accessed using a URL structure based on that name. - (Ryan McKinley via hossman) - - 9. SOLR-126: DirectUpdateHandler2 supports autocommitting after a specified time - (in ms), using 10000. - (Ryan McKinley via klaas). - -10. SOLR-116: IndexInfoRequestHandler added. (Erik Hatcher) - -11. SOLR-79: Add system property ${[:]} substitution for - configuration files loaded, including schema.xml and solrconfig.xml. - (Erik Hatcher with inspiration from Andrew Saar) - -12. SOLR-149: Changes to make Solr more easily embeddable, in addition - to logging which request handler handled each request. - (Ryan McKinley via yonik) - -13. SOLR-86: Added standalone Java-based command-line updater. - (Erik Hatcher via Bertrand Delecretaz) - -14. SOLR-152: DisMaxRequestHandler now supports configurable alternate - behavior when q is not specified. A "q.alt" param can be specified - using SolrQueryParser syntax as a mechanism for specifying what query - the dismax handler should execute if the main user query (q) is blank. - (Ryan McKinley via hossman) - -15. SOLR-158: new "qs" (Query Slop) param for DisMaxRequestHandler - allows for specifying the amount of default slop to use when parsing - explicit phrase queries from the user. - (Adam Hiatt via hossman) - -16. SOLR-81: SpellCheckerRequestHandler that uses the SpellChecker from - the Lucene contrib. - (Otis Gospodnetic and Adam Hiatt) - -17. SOLR-182: allow lazy loading of request handlers on first request. - (Ryan McKinley via yonik) - -18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding - support for relative or absolute directory path configurations, as - well as RAM based directory. (hossman) - -19. SOLR-197: New parameters for input: stream.contentType for specifying - or overriding the content type of input, and stream.file for reading - local files. (Ryan McKinley via yonik) - -20. SOLR-66: CSV data format for document additions and updates. (yonik) - -21. SOLR-184: add echoHandler=true to responseHeader, support echoParams=all - (Ryan McKinley via ehatcher) - -22. SOLR-211: Added a regex PatternTokenizerFactory. This extracts tokens - from the input string using a regex Pattern. (Ryan McKinley) - -23. SOLR-162: Added a "Luke" request handler and other admin helpers. - This exposes the system status through the standard requestHandler - framework. (ryan) - -24. SOLR-212: Added a DirectSolrConnection class. This lets you access - solr using the standard request/response formats, but does not require - an HTTP connection. It is designed for embedded applications. (ryan) - -25. SOLR-204: The request dispatcher (added in SOLR-104) can handle - calls to /select. This offers uniform error handling for /update and - /select. To enable this behavior, you must add: - to your solrconfig.xml - See the example solrconfig.xml for details. (ryan) - -26. SOLR-170: StandardRequestHandler now supports a "sort" parameter. - Using the ';' syntax is still supported, but it is recommended to - transition to the new syntax. (ryan) - -27. SOLR-181: The index schema now supports "required" fields. Attempts - to add a document without a required field will fail, returning a - descriptive error message. By default, the uniqueKey field is - a required field. This can be disabled by setting required=false - in schema.xml. (Greg Ludington via ryan) - -28. SOLR-217: Fields configured in the schema to be neither indexed or - stored will now be quietly ignored by Solr when Documents are added. - The example schema has a comment explaining how this can be used to - ignore any "unknown" fields. - (Will Johnson via hossman) - -29. SOLR-227: If schema.xml defines multiple fieldTypes, fields, or - dynamicFields with the same name, a severe error will be logged rather - then quietly continuing. Depending on the - settings, this may halt the server. Likewise, if solrconfig.xml - defines multiple RequestHandlers with the same name it will also add - an error. (ryan) - -30. SOLR-226: Added support for dynamic field as the destination of a - copyField using glob (*) replacement. (ryan) - -31. SOLR-224: Adding a PhoneticFilterFactory that uses apache commons codec - language encoders to build phonetically similar tokens. This currently - supports: DoubleMetaphone, Metaphone, Soundex, and RefinedSoundex (ryan) - -32. SOLR-199: new n-gram tokenizers available via NGramTokenizerFactory - and EdgeNGramTokenizerFactory. (Adam Hiatt via yonik) - -33. SOLR-234: TrimFilter can update the Token's startOffset and endOffset - if updateOffsets="true". By default the Token offsets are unchanged. - (ryan) - -34. SOLR-208: new example_rss.xsl and example_atom.xsl to provide more - examples for people about the Solr XML response format and how they - can transform it to suit different needs. - (Brian Whitman via hossman) - -35. SOLR-249: Deprecated SolrException( int, ... ) constructors in favor - of constructors that takes an ErrorCode enum. This will ensure that - all SolrExceptions use a valid HTTP status code. (ryan) - -36. SOLR-386: Abstracted SolrHighlighter and moved existing implementation - to DefaultSolrHighlighter. Adjusted SolrCore and solrconfig.xml so - that highlighter is configurable via a class attribute. Allows users - to use their own highlighter implementation. (Tricia Williams via klaas) - -Changes in runtime behavior - 1. Highlighting using DisMax will only pick up terms from the main - user query, not boost or filter queries (klaas). - - 2. SOLR-125: Change default of json.nl to flat, change so that - json.nl only affects items where order matters (facet constraint - listings). Fix JSON output bug for null values. Internal JAVA API: - change most uses of NamedList to SimpleOrderedMap. (yonik) - - 3. A new method "getSolrQueryParser" has been added to the IndexSchema - class for retrieving a new SolrQueryParser instance with all options - specified in the schema.xml's block set. The - documentation for the SolrQueryParser constructor and it's use of - IndexSchema have also been clarified. - (Erik Hatcher and hossman) - - 4. DisMaxRequestHandler's bq, bf, qf, and pf parameters can now accept - multiple values (klaas). - - 5. Query are re-written before highlighting is performed. This enables - proper highlighting of prefix and wildcard queries (klaas). - - 6. A meaningful exception is raised when attempting to add a doc missing - a unique id if it is declared in the schema and allowDups=false. - (ryan via klaas) - - 7. SOLR-183: Exceptions with error code 400 are raised when - numeric argument parsing fails. RequiredSolrParams class added - to facilitate checking for parameters that must be present. - (Ryan McKinley, J.J. Larrea via yonik) - - 8. SOLR-179: By default, solr will abort after any severe initalization - errors. This behavior can be disabled by setting: - false - in solrconfig.xml (ryan) - - 9. The example solrconfig.xml maps /update to XmlUpdateRequestHandler using - the new request dispatcher (SOLR-104). This requires posted content to - have a valid contentType: curl -H 'Content-type:text/xml; charset=utf-8' - The response format matches that of /select and returns standard error - codes. To enable solr1.1 style /update, do not map "/update" to any - handler in solrconfig.xml (ryan) - -10. SOLR-231: If a charset is not specified in the contentType, - ContentStream.getReader() will use UTF-8 encoding. (ryan) - -11. SOLR-230: More options for post.jar to support stdin, xml on the - commandline, and defering commits. Tutorial modified to take - advantage of these options so there is no need for curl. - (hossman) - -12. SOLR-128: Upgraded Jetty to the latest stable release 6.1.3 (ryan) - -Optimizations - 1. SOLR-114: HashDocSet specific implementations of union() and andNot() - for a 20x performance improvement for those set operations, and a new - hash algorithm speeds up exists() by 10% and intersectionSize() by 8%. - (yonik) - - 2. SOLR-115: Solr now uses BooleanQuery.clauses() instead of - BooleanQuery.getClauses() in any situation where there is no risk of - modifying the original query. - (hossman) - - 3. SOLR-221: Speed up sorted faceting on multivalued fields by ~60% - when the base set consists of a relatively large portion of the - index. (yonik) - - 4. SOLR-221: Added a facet.enum.cache.minDf parameter which avoids - using the filterCache for terms that match few documents, trading - decreased memory usage for increased query time. (yonik) - -Bug Fixes - 1. SOLR-87: Parsing of synonym files did not correctly handle escaped - whitespace such as \r\n\t\b\f. (yonik) - - 2. SOLR-92: DOMUtils.getText (used when parsing config files) did not - work properly with many DOM implementations when dealing with - "Attributes". (Ryan McKinley via hossman) - - 3. SOLR-9,SOLR-99: Tighten up sort specification error checking, throw - exceptions for missing sort specifications or a sort on a non-indexed - field. (Ryan McKinley via yonik) - - 4. SOLR-145: Fix for bug introduced in SOLR-104 where some Exceptions - were being ignored by all "out of the box" RequestHandlers. (hossman) - - 5. SOLR-166: JNDI solr.home code refactoring. SOLR-104 moved - some JNDI related code to the init method of a Servlet Filter - - according to the Servlet Spec, all Filter's should be initialized - prior to initializing any Servlets, but this is not the case in at - least one Servlet Container (Resin). This "bug fix" refactors - this JNDI code so that it should be executed the first time any - attempt is made to use the solr.home dir. - (Ryan McKinley via hossman) - - 6. SOLR-173: Bug fix to SolrDispatchFilter to reduce "too many open - files" problem was that SolrDispatchFilter was not closing requests - when finished. Also modified ResponseWriters to only fetch a Searcher - reference if necessary for writing out DocLists. - (Ryan McKinley via hossman) - - 7. SOLR-168: Fix display positioning of multiple tokens at the same - position in analysis.jsp (yonik) - - 8. SOLR-167: The SynonymFilter sometimes generated incorrect offsets when - multi token synonyms were mached in the source text. (yonik) - - 9. SOLR-188: bin scripts do not support non-default webapp names. Added "-U" - option to specify a full path to the update url, overriding the - "-h" (hostname), "-p" (port) and "-w" (webapp name) parameters. - (Jeff Rodenburg via billa) - -10. SOLR-198: RunExecutableListener always waited for the process to - finish, even when wait="false" was set. (Koji Sekiguchi via yonik) - -11. SOLR-207: Changed distribution scripts to remove recursive find - and avoid use of "find -maxdepth" on platforms where it is not - supported. (yonik) - -12. SOLR-222: Changing writeLockTimeout in solrconfig.xml did not - change the effective timeout. (Koji Sekiguchi via yonik) - -13. Changed the SOLR-104 RequestDispatcher so that /select?qt=xxx can not - access handlers that start with "/". This makes path based authentication - possible for path based request handlers. (ryan) - -14. SOLR-214: Some servlet containers (including Tomcat and Resin) do not - obey the specified charset. Rather then letting the the container handle - it solr now uses the charset from the header contentType to decode posted - content. Using the contentType: "text/xml; charset=utf-8" will force - utf-8 encoding. If you do not specify a contentType, it will use the - platform default. (Koji Sekiguchi via ryan) - -15. SOLR-241: Undefined system properties used in configuration files now - cause a clear message to be logged rather than an obscure exception thrown. - (Koji Sekiguchi via ehatcher) - -Other Changes - 1. Updated to Lucene 2.1 - - 2. Updated to Lucene 2007-05-20_00-04-53 - -================== Release 1.1.0, 20061222 ================== - -Status ------- -This is the first release since Solr joined the Incubator, and brings many -new features and performance optimizations including highlighting, -faceted browsing, and JSON/Python/Ruby response formats. - - -Upgrading from previous Solr versions -------------------------------------- -Older Apache Solr installations can be upgraded by replacing -the relevant war file with the new version. No changes to configuration -files are needed and the index format has not changed. - -The default version of the Solr XML response syntax has been changed to 2.2. -Behavior can be preserved for those clients not explicitly specifying a -version by adding a default to the request handler in solrconfig.xml - -By default, Solr will no longer use a searcher that has not fully warmed, -and requests will block in the meantime. To change back to the previous -behavior of using a cold searcher in the event there is no other -warm searcher, see the useColdSearcher config item in solrconfig.xml - -The XML response format when adding multiple documents to the collection -in a single command has changed to return a single . - - -Detailed Change List --------------------- - -New Features - 1. added support for setting Lucene's positionIncrementGap - 2. Admin: new statistics for SolrIndexSearcher - 3. Admin: caches now show config params on stats page - 3. max() function added to FunctionQuery suite - 4. postOptimize hook, mirroring the functionallity of the postCommit hook, - but only called on an index optimize. - 5. Ability to HTTP POST query requests to /select in addition to HTTP-GET - 6. The default search field may now be overridden by requests to the - standard request handler using the df query parameter. (Erik Hatcher) - 7. Added DisMaxRequestHandler and SolrPluginUtils. (Chris Hostetter) - 8. Support for customizing the QueryResponseWriter per request - (Mike Baranczak / SOLR-16 / hossman) - 9. Added KeywordTokenizerFactory (hossman) -10. copyField accepts dynamicfield-like names as the source. - (Darren Erik Vengroff via yonik, SOLR-21) -11. new DocSet.andNot(), DocSet.andNotSize() (yonik) -12. Ability to store term vectors for fields. (Mike Klaas via yonik, SOLR-23) -13. New abstract BufferedTokenStream for people who want to write - Tokenizers or TokenFilters that require arbitrary buffering of the - stream. (SOLR-11 / yonik, hossman) -14. New RemoveDuplicatesToken - useful in situations where - synonyms, stemming, or word-deliminater-ing produce identical tokens at - the same position. (SOLR-11 / yonik, hossman) -15. Added highlighting to SolrPluginUtils and implemented in StandardRequestHandler - and DisMaxRequestHandler (SOLR-24 / Mike Klaas via hossman,yonik) -16. SnowballPorterFilterFactory language is configurable via the "language" - attribute, with the default being "English". (Bertrand Delacretaz via yonik, SOLR-27) -17. ISOLatin1AccentFilterFactory, instantiates ISOLatin1AccentFilter to remove accents. - (Bertrand Delacretaz via yonik, SOLR-28) -18. JSON, Python, Ruby QueryResponseWriters: use wt="json", "python" or "ruby" - (yonik, SOLR-31) -19. Make web admin pages return UTF-8, change Content-type declaration to include a - space between the mime-type and charset (Philip Jacob, SOLR-35) -20. Made query parser default operator configurable via schema.xml: - - The default operator remains "OR". -21. JAVA API: new version of SolrIndexSearcher.getDocListAndSet() which takes - flags (Greg Ludington via yonik, SOLR-39) -22. A HyphenatedWordsFilter, a text analysis filter used during indexing to rejoin - words that were hyphenated and split by a newline. (Boris Vitez via yonik, SOLR-41) -23. Added a CompressableField base class which allows fields of derived types to - be compressed using the compress=true setting. The field type also gains the - ability to specify a size threshold at which field data is compressed. - (klaas, SOLR-45) -24. Simple faceted search support for fields (enumerating terms) - and arbitrary queries added to both StandardRequestHandler and - DisMaxRequestHandler. (hossman, SOLR-44) -25. In addition to specifying default RequestHandler params in the - solrconfig.xml, support has been added for configuring values to be - appended to the multi-val request params, as well as for configuring - invariant params that can not overridden in the query. (hossman, SOLR-46) -26. Default operator for query parsing can now be specified with q.op=AND|OR - from the client request, overriding the schema value. (ehatcher) -27. New XSLTResponseWriter does server side XSLT processing of XML Response. - In the process, an init(NamedList) method was added to QueryResponseWriter - which works the same way as SolrRequestHandler. - (Bertrand Delacretaz / SOLR-49 / hossman) -28. json.wrf parameter adds a wrapper-function around the JSON response, - useful in AJAX with dynamic script tags for specifying a JavaScript - callback function. (Bertrand Delacretaz via yonik, SOLR-56) -29. autoCommit can be specified every so many documents added (klaas, SOLR-65) -30. ${solr.home}/lib directory can now be used for specifying "plugin" jars - (hossman, SOLR-68) -31. Support for "Date Math" relative "NOW" when specifying values of a - DateField in a query -- or when adding a document. - (hossman, SOLR-71) -32. useColdSearcher control in solrconfig.xml prevents the first searcher - from being used before it's done warming. This can help prevent - thrashing on startup when multiple requests hit a cold searcher. - The default is "false", preventing use before warm. (yonik, SOLR-77) - -Changes in runtime behavior - 1. classes reorganized into different packages, package names changed to Apache - 2. force read of document stored fields in QuerySenderListener - 3. Solr now looks in ./solr/conf for config, ./solr/data for data - configurable via solr.solr.home system property - 4. Highlighter params changed to be prefixed with "hl."; allow fragmentsize - customization and per-field overrides on many options - (Andrew May via klaas, SOLR-37) - 5. Default param values for DisMaxRequestHandler should now be specified - using a '...' init param, for backwards - compatability all init prams will be used as defaults if an init param - with that name does not exist. (hossman, SOLR-43) - 6. The DisMaxRequestHandler now supports multiple occurances of the "fq" - param. (hossman, SOLR-44) - 7. FunctionQuery.explain now uses ComplexExplanation to provide more - accurate score explanations when composed in a BooleanQuery. - (hossman, SOLR-25) - 8. Document update handling locking is much sparser, allowing performance gains - through multiple threads. Large commits also might be faster (klaas, SOLR-65) - 9. Lazy field loading can be enabled via a solrconfig directive. This will be faster when - not all stored fields are needed from a document (klaas, SOLR-52) -10. Made admin JSPs return XML and transform them with new XSL stylesheets - (Otis Gospodnetic, SOLR-58) -11. If the "echoParams=explicit" request parameter is set, request parameters are copied - to the output. In an XML output, they appear in new list inside - the new element, which replaces the old . - Adding a version=2.1 parameter to the request produces the old format, for backwards - compatibility (bdelacretaz and yonik, SOLR-59). - -Optimizations - 1. getDocListAndSet can now generate both a DocList and a DocSet from a - single lucene query. - 2. BitDocSet.intersectionSize(HashDocSet) no longer generates an intermediate - set - 3. OpenBitSet completed, replaces BitSet as the implementation for BitDocSet. - Iteration is faster, and BitDocSet.intersectionSize(BitDocSet) and unionSize - is between 3 and 4 times faster. (yonik, SOLR-15) - 4. much faster unionSize when one of the sets is a HashDocSet: O(smaller_set_size) - 5. Optimized getDocSet() for term queries resulting in a 36% speedup of facet.field - queries where DocSets aren't cached (for example, if the number of terms in the field - is larger than the filter cache.) (yonik) - 6. Optimized facet.field faceting by as much as 500 times when the field has - a single token per document (not multiValued & not tokenized) by using the - Lucene FieldCache entry for that field to tally term counts. The first request - utilizing the FieldCache will take longer than subsequent ones. - -Bug Fixes - 1. Fixed delete-by-id for field types who's indexed form is different - from the printable form (mainly sortable numeric types). - 2. Added escaping of attribute values in the XML response (Erik Hatcher) - 3. Added empty extractTerms() to FunctionQuery to enable use in - a MultiSearcher (Yonik) - 4. WordDelimiterFilter sometimes lost token positionIncrement information - 5. Fix reverse sorting for fields were sortMissingFirst=true - (Rob Staveley, yonik) - 6. Worked around a Jetty bug that caused invalid XML responses for fields - containing non ASCII chars. (Bertrand Delacretaz via yonik, SOLR-32) - 7. WordDelimiterFilter can throw exceptions if configured with both - generate and catenate off. (Mike Klaas via yonik, SOLR-34) - 8. Escape '>' in XML output (because ]]> is illegal in CharData) - 9. field boosts weren't being applied and doc boosts were being applied to fields (klaas) -10. Multiple-doc update generates well-formed xml (klaas, SOLR-65) -11. Better parsing of pingQuery from solrconfig.xml (hossman, SOLR-70) -12. Fixed bug with "Distribution" page introduced when Versions were - added to "Info" page (hossman) -13. Fixed HTML escaping issues with user input to analysis.jsp and action.jsp - (hossman, SOLR-74) - -Other Changes - 1. Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224, - http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=markup&pathrev=416224 - 2. Modified admin styles to improve display in Internet Explorer (Greg Ludington via billa, SOLR-6) - 3. Upgrade to Lucene 2.0 nightly build 2006-07-15, lucene SVN revision 422302, - 4. Included unique key field name/value (if available) in log message of add (billa, SOLR-18) - 5. Updated to Lucene 2.0 nightly build 2006-09-07, SVN revision 462111 - 6. Added javascript to catch empty query in admin query forms (Tomislav Nakic-Alfirevic via billa, SOLR-48 - 7. blackslash escape * in ssh command used in snappuller for zsh compatibility, SOLR-63 - 8. check solr return code in admin scripts, SOLR-62 - 9. Updated to Lucene 2.0 nightly build 2006-11-15, SVN revision 475069 -10. Removed src/apps containing the legacy "SolrTest" app (hossman, SOLR-3) -11. Simplified index.jsp and form.jsp, primarily by removing/hiding XML - specific params, and adding an option to pick the output type. (hossman) -12. Added new numeric build property "specversion" to allow clean - MANIFEST.MF files (hossman) -13. Added Solr/Lucene versions to "Info" page (hossman) -14. Explicitly set mime-type of .xsl files in web.xml to - application/xslt+xml (hossman) -15. Config parsing should now work useing DOM Level 2 parsers -- Solr - previously relied on getTextContent which is a DOM Level 3 addition - (Alexander Saar via hossman, SOLR-78) - -2006/01/17 Solr open sourced, moves to Apache Incubator diff -Nru solr-1.4.0+ds1/client/javascript/README.txt solr-3.3/client/javascript/README.txt --- solr-1.4.0+ds1/client/javascript/README.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/javascript/README.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -For a Solr JavaScript Client, see: -http://evolvingweb.github.com/ajax-solr/ - -For information on (now deprecated) SorlJS, see: -http://wiki.apache.org/solr/SolrJS \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/python/README.txt solr-3.3/client/python/README.txt --- solr-1.4.0+ds1/client/python/README.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/python/README.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -Note: As of version 1.3, Solr no longer comes bundled with a Python client. The existing client -was not sufficiently maintained or tested as development of Solr progressed, and committers -felt that the code was not up to our usual high standards of release. - -The client bundled with previous versions of Solr will continue to be available indefinitely at: -http://svn.apache.org/viewvc/lucene/solr/tags/release-1.2.0/client/python/ - -Please see http://wiki.apache.org/solr/SolPython for information on third-party Solr python -clients. diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/CHANGES.yml solr-3.3/client/ruby/solr-ruby/CHANGES.yml --- solr-1.4.0+ds1/client/ruby/solr-ruby/CHANGES.yml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/CHANGES.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -v0.0.8: - release_date: TBD - changes: - - Updated Solr::Request::Standard to use modern style sort parameter - -v0.0.7: - release_date: 2009-03-06 - changes: - - Fixed string parameters with carriage returns in Solr::Request::Select (due to String.each pain) - - SOLR-1047 - added support for facet.method - -v0.0.6: - release_date: 2008-07-14 - changes: - - Added Solr::Request::Spellcheck - - Enabled Solr::Request::Select to work as a general pass through to any registered request handler - - Fixed modify_document_test.rb so as to not be brittle with Hash ordering - - Added support for alternate field highlighting to Solr::Request::Standard (and thus DisMax) - - Added facet.offset support to Solr::Request::Standard/Dismax - - Added shards parameter to Solr::Request::Standard/Dismax - -v0.0.5: - release_date: 2007-08-27 - changes: - - Added support for highlighter fragment size to Solr::Request::Standard - - Added support for MoreLikeThese to Solr::Request::Standard - - Added Solr::Request::ModifyDocument (requires SOLR-139 patch) - - Added Solr::Util.query_parser_escape() - -v0.0.4: - release_date: 2007-08-16 - changes: - - Solr::Indexer#solr added to gain access to the Solr::Connection instance - - Fixed issue with multi-line String field values when field set multiValued="false" - - Fixed tests to work without either Hpricot or libxml2 - -v0.0.3: - release_date: 2007-05-22 - changes: - - Adjusted HpricotMapper and XPathMapper, and tests, to load only if their dependencies are available. - -v0.0.2: - release_date: 2007-05-15 - changes: - - mappers, etc - -v0.0.1: - release_date: 2007-02-15 - changes: - - initial release diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/examples/delicious_library/dl_importer.rb solr-3.3/client/ruby/solr-ruby/examples/delicious_library/dl_importer.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/examples/delicious_library/dl_importer.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/examples/delicious_library/dl_importer.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -#!/usr/bin/env ruby -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: distill common shell script needs into common file for parsing parameters for Solr URL, input filename, -debug, etc -# script/runner or script/console-like, from Rails. A data mapper would be a great generalizable piece. - -require 'solr' - -solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr" -dl_filename = ARGV[0] -debug = ARGV[1] == "-debug" - -if dl_filename == nil - puts "You must pass a filename as an option." - exit -end - -source = Solr::Importer::DelimitedFileSource.new(dl_filename) - -# Exported column names -# medium,associatedURL,boxHeightInInches,boxLengthInInches,boxWeightInPounds,boxWidthInInches, -# scannednumber,upc,asin,country,title,fullTitle,series,numberInSeries,edition,aspect,mediacount, -# genre,price,currentValue,language,netrating,description,owner,publisher,published,rare,purchaseDate,rating, -# used,signed,hasExperienced,notes,location,paid,condition,notowned,author,illustrator,pages -mapping = { - :id => Proc.new {|data| data[:upc].empty? ? data[:asin] : data[:upc]}, - :medium_facet => :medium, - :country_facet => :country, - :signed_facet => :signed, - :rating_facet => :netrating, - :language_facet => :language, - :genre_facet => Proc.new {|data| data[:genre].split('/').map {|s| s.strip}}, - :title_text => :title, - :full_title_text => :fullTitle, - :asin_display => :asin, - :notes_text => :notes, - :publisher_facet => :publisher, - :description_text => :description, - :author_text => :author, - :pages_text => :pages, - :published_year_facet => Proc.new {|data| data[:published].scan(/\d\d\d\d/)[0]} -} - -indexer = Solr::Indexer.new(source, mapping, :debug => debug) -indexer.index do |record, solr_document| - # can modify solr_document before it is indexed here -end - -indexer.solr.commit unless debug -indexer.solr.optimize unless debug \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/examples/delicious_library/sample_export.txt solr-3.3/client/ruby/solr-ruby/examples/delicious_library/sample_export.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/examples/delicious_library/sample_export.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/examples/delicious_library/sample_export.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,164 +0,0 @@ -medium associatedURL boxHeightInInches boxLengthInInches boxWeightInPounds boxWidthInInches scannednumber upc asin country title fullTitle series numberInSeries edition aspect mediacount genre price currentValue language netrating description owner publisher published rare purchaseDate rating used signed hasExperienced notes location paid condition notowned platform key developer esrbrating players author illustrator pages director stars features mpaarating theatricalDate minutes artist conductor tracks tracklisting -game 0711719721123 B00006Z7HU us ATV Offroad Fury 2 ATV Offroad Fury 2 Video Game/ CD-ROM Online/ Bike/ Video Games $19.99 $4.40 4 ATV Offroad Fury 2 provides expansive off-road racing gameplay packed with more courses, modes, tricks, and ATVs, plus online gameplay via the network adapter to heighten the racing experience. Players will choose from more than 20 licensed and team-sponsored ATVs from top-tier manufacturers and off-road sponsors. Every featured ATV will be true to spec, allowing for realistic handling and reactions in every situation. Sony Computer Entertainment 20-03-2003 07-02-2007 5 PlayStation2 Teen -book 9780966075007 0966075005 us Chinese Characters: A Genealogy and Dictionary Chinese Characters: A Genealogy and Dictionary Paperback Chinese/ Polyglot/ Dictionaries; Polyglot $19.95 $18.00 4.5 This dictionary is designed to help students understand, appreciate and remember Chinese characters. It has the following features: -Every character entry includes a brief traditional Chinese etymology. -Genealogical charts highlight the connections between characters, showing the creation of more than 4000 characters from less than 200 simple pictographs and ideographs. -Mandarin standards in China and Taiwan are distinguished. -Simplified forms for each character are given. -Character entries list all words which use the character in any position, allowing a word to be found even if the first character is unknown. -English definitions are referenced in an English-Chinese index. -A word pronunciation index allows students to directly search for an overheard word without having to guess the initial character. -A stroke count index lists every character by number of strokes. Zhongwen.Com 01-08-1998 07-02-2007 550 -book 9780195840971 0195840976 us Concise English-Chinese Chinese-English Dictionary Concise English-Chinese Chinese-English Dictionary Paperback English (All)/ Chinese/ Linguistics $12.95 $3.00 3.5 With nearly 20,000 entries in each, this bilingual dictionary is ideal for travelers and students of Chinese or English. Among the dictionary's many features are:/ *Simplified Chinese characters as well as Pinyin/ romanization / *Pronunciation using international phonetic symbols/ *Numerous examples of usage in both languages/ *Appendices including consonants and vowels of the/ Chinese phonetic alphabet, and names and / abbreviations of China's provinces, regions,/ and municipalities/ *Handy pocket-sized format Oxford University Press, USA 01-07-1994 11-02-2007 1114 -movie 0025192022425 0783226985 us Dragon: The Bruce Lee Story Dragon: The Bruce Lee Story DVD Biography/ Drama/ Action & Adventure/ Documentary $14.98 $5.62 4 This enjoyable and touching biography of martial-arts film star Bruce Lee stars Jason Scott Lee (no relation), an actor with a lively face and natural intensity, who makes every moment of this film compelling. Directed by Rob Cohen, Dragon traces Bruce Lee's slow rise over myriad obstacles--most of them race-based--to become an international superstar in films. Lee's origins are oddly set in San Francisco instead of his real home in Seattle, but then again there is plenty of artistic license going on as Cohen explores the actor's psyche through some powerful fantasy sequences. Lauren Holly is good as Lee's wife, Linda (whose book about her late husband inspired this movie). A scene involving Bruce's rescue of son Brandon (who died in a filmmaking accident in 1993) from a murderous spirit is plain spooky. The special-edition DVD release has a widescreen presentation, director interview, featurette, screen tests, closed captioning, optional French soundtrack, and optional Spanish subtitles. --Tom Keogh Universal Studios 01-07-1998 07-02-2007 Aki Aleong/ Eric Bruskotter/ John Cheung/ Chao Li Chi/ Sam Hau/ Lauren Holly/ Clyde Kusatsu/ Nancy Kwan/ Michael Learned/ Jason Scott Lee/ Kay Tong Lim/ Sterling Macer Jr./ Iain M. Parker/ Ong Soo Han/ Michelle Tennant/ Sven-Ole Thorsen/ Robert Wagner/ Luoyong Wang/ Ric Young Closed-captioned/ Color/ Letterboxed/ Widescreen/ NTSC/ 2.35:1 PG-13 07-05-1993 120 -movie 0018111924795 B0009IW92A us The Essential Sherlock Holmes The Essential Sherlock Holmes DVD 8 Mystery/ Drama/ Mystery & Suspense/ Sherlock Holmes $29.99 $8.95 3.5 Dressed to Kill Terror By Night The Woman in Green Sherlock Holmes And the Secret Weapon A Study in Scarlet Silver Blaze Sherlock Holmes TV Series Delta 07-06-2005 09-02-2007 Essential Sherlock Holmes Box set/ Color/ NTSC NR -book 0073999151893 1569221863 us Guitar Scale Guru: The Scale Book - Your Guide for Success! Guitar Scale Guru: The Scale Book - Your Guide for Success! Paperback Guitar/ Instruction & Study/ Techniques $14.95 $9.46 4.5 All of the essential diagrams, drawings and information concerning theory, scales, and their uses in one easy-to-use book! Covers the five essential scales for today's guitarists - major, major pentatonic, minor, minor pentatonic and blues - with a unique Linking System that makes it easy to understand scales like a pro! Creative Concepts 01-02-2000 11-02-2007 168 -music 0019028394855 B000CC2XP6 us Light Up Ahead Light Up Ahead Audio CD $11.95 4 Track Listing: 1.going too 2.comin' after you 3.on the beam 4.dont know what 5.hide 6.signs 7.lower voice 8.bad side 9.light up ahead TR Music 07-02-1995 07-02-2007 5 1 -music 0800314886006 B000B7PU66 us Parallel Universe Parallel Universe Audio CD 2 $15.26 Contains Parallel Universe CD plus bonus Invisible Pagan Underdogs 13 track CD. 23 total tracks. TR Music 07-02-2005 07-02-2007 5 1 -book 9781556152115 1556152116 us Programmers at Work: Interviews With 19 Programmers Who Shaped the Computer Industry (Tempus) Programmers at Work: Interviews With 19 Programmers Who Shaped the Computer Industry (Tempus) Paperback Careers/ Compilers $9.95 $19.92 5 Tempus Books 07-02-1989 07-02-2007 400 -book 9781904978367 1904978363 us Time Out Amsterdam (Time Out Amsterdam Guide) Time Out Amsterdam (Time Out Amsterdam Guide) Paperback Guidebooks/ Amsterdam/ Time Out Travel $19.95 $0.99 3.5 Europe's most infamous city remains one of its most popular, and not without good reason: between its world-class art museums, its eminent canals that are perfect for wandering, and its coffee shops that don't exactly specialize in coffee, its variety is glorious indeed. However, with one of Europe's more forward-thinking cultural scenes and striking new architectural developments in IJburg and the Bijlmermeer, there's much more here to enjoy than the clichés; written, researched and edited entirely by locals, the Time Out Amsterdam guide tells travelers all about it. Highlights include Amsterdam after dark - the best restaurants, bars, and nightclubs in the city; an unmatched section on the city's cultural scene including galleries, performance art, classical music, and theater; and trips beyond Amsterdam to the flower auction in Aalsmeer, the cheese market at Gouda, and the windmills of Alblasserdam. Time Out Publishing 10-07-2005 18-02-2007 320 -book 9780865470804 0865470804 us Taking the Path of Zen (Taking the Path of Zen Ppr) Taking the Path of Zen (Taking the Path of Zen Ppr) Paperback Zen $12.00 $4.40 4 There is a fine art to presenting complex ideas with simplicity and insight, in a manner that both guides and inspires. In Taking the Path of Zen Robert Aitken presents the practice, lifestyle, rationale, and ideology of Zen Buddhism with remarkable clarity. / / The foundation of Zen is the practice of zazen, or mediation, and Aitken Roshi insists that everything flows from the center. He discusses correct breathing, posture, routine, teacher-student relations, and koan study, as well as common problems and milestones encountered in the process. Throughout the book the author returns to zazen, offering further advice and more advanced techniques. The orientation extends to various religious attitudes and includes detailed discussions of the Three Treasures and the Ten Precepts of Zen Buddhism./ Taking the Path of Zen will serve as orientation and guide for anyone who is drawn to the ways of Zen, from the simply curious to the serious Zen student. / North Point Press 01-01-1982 06-02-2007 Robert Aitken 150 -book 9780963177513 0963177516 us Bubishi: Martial Art Spirit Bubishi: Martial Art Spirit Paperback Martial Arts $34.95 $61.18 5 Bubishi-Martial Art Spirit is the secret karate text of the Okinawan Masters. Guarded for centuries, this mystical book has finally been completely translated into English. The BUBISHI was cherished by Miyagi Chojun, the founder of Goju Ryu, Funakoshi Gichin, founder of Shotokan and Mabuni Kenwa, founder of Shito Ryu karate. It includes Dim Mak (The Death Touch), pressure points, knockout and killing techniques , 48 essential self defense applications, Chinese cures for Martial arts injuries and much more!!! Yamazato Pubns 12-02-1993 06-02-2007 George Alexander -book 9780142000281 0142000280 us Getting Things Done: The Art of Stress-Free Productivity Getting Things Done: The Art of Stress-Free Productivity Paperback Health & Stress/ Time Management/ Guides/ Labor & Industrial Relations/ Motivational/ Office Skills/ Creativity/ Self-Esteem/ Stress Management $15.00 $7.01 4.5 With first-chapter allusions to martial arts, "flow,""mind like water," and other concepts borrowed from the East (and usually mangled), you'd almost think this self-helper from David Allen should have been called Zen and the Art of Schedule Maintenance./ Not quite. Yes, Getting Things Done offers a complete system for downloading all those free-floating gotta-do's clogging your brain into a sophisticated framework of files and action lists--all purportedly to free your mind to focus on whatever you're working on. However, it still operates from the decidedly Western notion that if we could just get really, really organized, we could turn ourselves into 24//7 productivity machines. (To wit, Allen, whom the New Economy bible Fast Company has dubbed "the personal productivity guru," suggests that instead of meditating on crouching tigers and hidden dragons while you wait for a plane, you should unsheathe that high-tech saber known as the cell phone and attack that list of calls you need to return.)/ As whole-life-organizing systems go, Allen's is pretty good, even fun and therapeutic. It starts with the exhortation to take every unaccounted-for scrap of paper in your workstation that you can't junk, The next step is to write down every unaccounted-for gotta-do cramming your head onto its own scrap of paper. Finally, throw the whole stew into a giant "in-basket"/ That's where the processing and prioritizing begin; in Allen's system, it get a little convoluted at times, rife as it is with fancy terms, subterms, and sub-subterms for even the simplest concepts. Thank goodness the spine of his system is captured on a straightforward, one-page flowchart that you can pin over your desk and repeatedly consult without having to refer back to the book. That alone is worth the purchase price. Also of value is Allen's ingenious Two-Minute Rule: if there's anything you absolutely must do that you can do right now in two minutes or less, then do it now, thus freeing up your time and mind tenfold over the long term. It's commonsense advice so obvious that most of us completely overlook it, much to our detriment; Allen excels at dispensing such wisdom in this useful, if somewhat belabored, self-improver aimed at everyone from CEOs to soccer moms (who we all know are more organized than most CEOs to start with). --Timothy Murphy/ Penguin (Non-Classics) 31-12-2002 03-02-2007 David Allen 267 -book 0076092024163 0131422464 us Core J2EE Patterns: Best Practices and Design Strategies, Second Edition Core J2EE Patterns: Best Practices and Design Strategies, Second Edition Hardcover Qualifying Textbooks - Winter 2007 $54.99 $23.99 5 Prentice Hall Ptr 10-05-2003 07-02-2007 1 Deepak Alur/ Dan Malks/ John Crupi 650 -book 9780471202820 0471202827 us Agile Modeling: Effective Practices for Extreme Programming and the Unified Process Agile Modeling: Effective Practices for Extreme Programming and the Unified Process Paperback Object-Oriented Design/ Software Development/ Quality Control/ Computers & Internet/ Qualifying Textbooks - Winter 2007 $34.99 $24.39 3.5 The first book to cover Agile Modeling, a new modeling technique created specifically for XP projects eXtreme Programming (XP) has created a buzz in the software development community-much like Design Patterns did several years ago. Although XP presents a methodology for faster software development, many developers find that XP does not allow for modeling time, which is critical to ensure that a project meets its proposed requirements. They have also found that standard modeling techniques that use the Unified Modeling Language (UML) often do not work with this methodology. In this innovative book, Software Development columnist Scott Ambler presents Agile Modeling (AM)-a technique that he created for modeling XP projects using pieces of the UML and Rational's Unified Process (RUP). Ambler clearly explains AM, and shows readers how to incorporate AM, UML, and RUP into their development projects with the help of numerous case studies integrated throughout the book./ • AM was created by the author for modeling XP projects-an element lacking in the original XP design/ • The XP community and its creator have embraced AM, which should give this book strong market acceptance/ Companion Web site at www.agilemodeling.com features updates, links to XP and AM resources, and ongoing case studies about agile modeling./ John Wiley & Sons 01-02-2001 07-02-2007 Scott W. Ambler/ Ron Jeffries 224 -book 9780262012102 0262012103 us A Semantic Web Primer (Cooperative Information Systems) A Semantic Web Primer (Cooperative Information Systems) Hardcover Web Site Design/ Storage/ Internet $42.00 $25.98 4.5 The development of the Semantic Web, with machine-readable content, has the potential to revolutionize the World Wide Web and its use. A Semantic Web Primer provides an introduction and guide to this emerging field, describing its key ideas, languages, and technologies. Suitable for use as a textbook or for self-study by professionals, it concentrates on undergraduate-level fundamental concepts and techniques that will enable readers to proceed with building applications on their own. It includes exercises, project descriptions, and annotated references to relevant online materials. A Semantic Web Primer is the only available book on the Semantic Web to include a systematic treatment of the different languages (XML, RDF, OWL, and rules) and technologies (explicit metadata, ontologies, and logic and inference) that are central to Semantic Web development. The book also examines such crucial related topics as ontology engineering and application scenarios./ / After an introductory chapter, topics covered in succeeding chapters include XML and related technologies that support semantic interoperability; RDF and RDF Schema, the standard data model for machine-processable semantics; and OWL, the W3C-approved standard for a Web ontology language more extensive than RDF Schema; rules, both monotonic and nonmonotonic, in the framework of the Semantic Web; selected application domains and how the Semantic Web would benefit them; the development of ontology-based systems; and current debates on key issues and predictions for the future. The MIT Press 01-04-2004 07-02-2007 Grigoris Antoniou/ Frank van Harmelen 272 -book 0076092016335 0130674826 us A Practical Guide to eXtreme Programming A Practical Guide to eXtreme Programming Paperback Software Development/ Software Engineering $49.99 $4.38 4 Prentice Hall PTR 08-02-2002 08-02-2007 David Astels/ Granville Miller/ Miroslav Novak 384 -book 9780961454739 0961454733 us Art & Fear Art & Fear Paperback Study & Teaching/ Criticism $12.95 $7.25 4.5 "This is a book about making art. Ordinary art. Ordinary art means something like: all art not made by Mozart. After all, art is rarely made by Mozart-like people; essentially-statistically speaking-there aren't any people like that. Geniuses get made once-a-century or so, yet good art gets made all the time, so to equate the making of art with the workings of genius removes this intimately human activity to a strangely unreachable and unknowable place. For all practical purposes making art can be examined in great detail without ever getting entangled in the very remote problems of genius."
--from the Introduction/ Art & Fear explores the way art gets made, the reasons it often doesn't get made, and the nature of the difficulties that cause so many artists to give up along the way. The book's co-authors, David Bayles and Ted Orland, are themselves both working artists, grappling daily with the problems of making art in the real world. Their insights and observations, drawn from personal experience, provide an incisive view into the world of art as it is expeienced by artmakers themselves./ This is not your typical self-help book. This is a book written by artists, for artists -- it's about what it feels like when artists sit down at their easel or keyboard, in their studio or performance space, trying to do the work they need to do. First published in 1994, Art & Fear quickly became an underground classic. Word-of-mouth response alone-now enhanced by internet posting-has placed it among the best-selling books on artmaking and creativity nationally./ Art & Fear has attracted a remarkably diverse audience, ranging from beginning to accomplished artists in every medium, and including an exceptional concentration among students and teachers. The original Capra Press edition of Art & Fear sold 80,000 copies./ An excerpt:/ Today, more than it was however many years ago, art is hard because you have to keep after it so consistently. On so many different fronts. For so little external reward. Artists become veteran artists only by making peace not just with themselves, but with a huge range of issues. You have to find your work.../ Image Continuum Press 01-04-2001 03-02-2007 David Bayles/ Ted Orland 122 -book 0785342616415 0201616416 us Extreme Programming Explained: Embrace Change Extreme Programming Explained: Embrace Change Paperback Software Development/ Software Engineering $29.95 $2.98 4 Kent Beck's eXtreme Programming eXplained provides an intriguing high-level overview of the author's Extreme Programming (XP) software development methodology. Written for IS managers, project leaders, or programmers, this guide provides a glimpse at the principles behind XP and its potential advantages for small- to mid-size software development teams./ The book intends to describe what XP is, its guiding principles, and how it works. Simply written, the book avoids case studies and concrete details in demonstrating the efficacy of XP. Instead, it demonstrates how XP relies on simplicity, unit testing, programming in pairs, communal ownership of code, and customer input on software to motivate code improvement during the development process. As the author notes, these principles are not new, but when they're combined their synergy fosters a new and arguably better way to build and maintain software. Throughout the book, the author presents and explains these principles, such as "rapid feedback" and "play to win," which form the basis of XP./ Generally speaking, XP changes the way programmers work. The book is good at delineating new roles for programmers and managers who Beck calls "coaches." The most striking characteristic of XP is that programmers work in pairs, and that testing is an intrinsic part of the coding process. In a later section, the author even shows where XP works and where it doesn't and offers suggestions for migrating teams and organizations over to the XP process./ In the afterword, the author recounts the experiences that led him to develop and refine XP, an insightful section that should inspire any organization to adopt XP. This book serves as a useful introduction to the philosophy and practice of XP for the manager or programmer who wants a potentially better way to build software. --Richard Dragan/ Topics covered: Extreme Programming (XP) software methodology, principles, XP team roles, facilities design, testing, refactoring, the XP software lifecycle, and adopting XP./ Addison-Wesley Professional 05-10-1999 07-02-2007 Kent Beck 224 -book 0785342146530 0321146530 us Test Driven Development: By Example (Addison-Wesley Signature Series) Test Driven Development: By Example (Addison-Wesley Signature Series) Paperback Software Development/ Testing/ Software Engineering/ Qualifying Textbooks - Winter 2007 $44.99 $28.75 4 Addison-Wesley Professional 08-11-2002 07-02-2007 Kent Beck 240 -book 9780743245517 0743245512 us A Tooth from the Tiger's Mouth: How to Treat Your Injuries with Powerful Healing Secrets of the Great Chinese Warrior (Fireside Books (Fireside)) A Tooth from the Tiger's Mouth: How to Treat Your Injuries with Powerful Healing Secrets of the Great Chinese Warrior (Fireside Books (Fireside)) Paperback Healing/ Herbal Remedies/ Chinese Medicine $14.00 $8.25 5 A renowned expert in Chinese sports medicine and martial arts reveals ancient Eastern secrets for healing common injuries, including sprains, bruises, deep cuts, and much more./ For centuries, Chinese martial arts masters have kept their highly prized remedies as carefully guarded secrets, calling such precious and powerful knowledge "a tooth from the tiger's mouth." Now, for the first time, these deeply effective methods are revealed to Westerners who want alternative ways to treat the acute and chronic injuries experienced by any active person./ While many books outline the popular teachings of traditional Chinese medicine, only this one offers step-by-step instructions for treating injuries. Expert practitioner and martial artist Tom Bisio explains the complete range of healing strategies and provides a Chinese first-aid kit to help the reader fully recover from every mishap: cuts, sprains, breaks, dislocations, bruises, muscle tears, tendonitis, and much more./ He teaches readers how to:/ / • Examine and diagnose injuries/ • / • Prepare and apply herbal formulas/ • / • Assemble a portable kit for emergencies/ • / • Fully recuperate with strengthening exercises and healing dietary advice/ Comprehensive and easy to follow, with drawings to illustrate both the treatment strategies and the strengthening exercises, this unique guidebook will give readers complete access to the powerful healing secrets of the great Chinese warriors./ Fireside 05-10-2004 06-02-2007 Tom Bisio 384 -book 9781932394696 1932394699 us Ruby for Rails: Ruby Techniques for Rails Developers Ruby for Rails: Ruby Techniques for Rails Developers Paperback/ Illustrated Web Site Design/ Object-Oriented Design/ Transportation & Highway $44.95 $24.50 4 -The word is out: with Ruby on Rails you can build powerful Web applications easily and quickly! And just like the Rails framework itself, Rails applications are Ruby programs. That means you can't tap into the full power of Rails unless you master the Ruby language./ Ruby for Rails, written by Ruby expert David Black (with a forward by David Heinemeier Hansson), helps Rails developers achieve Ruby mastery. Each chapter deepens your Ruby knowledge and shows you how it connects to Rails. You'll gain confidence working with objects and classes and learn how to leverage Ruby's elegant, expressive syntax for Rails application power. And you'll become a better Rails developer through a deep understanding of the design of Rails itself and how to take advantage of it./ Newcomers to Ruby will find a Rails-oriented Ruby introduction that's easy to read and that includes dynamic programming techniques, an exploration of Ruby objects, classes, and data structures, and many neat examples of Ruby and Rails code in action. Ruby for Rails: the Ruby guide for Rails developers!/ What's Inside/ Classes, modules, and objects/ Collection handling and filtering/ String and regular expression manipulation/ Exploration of the Rails source code/ Ruby dynamics/ Many more programming concepts and techniques!/ Manning Publications 11-05-2006 07-02-2007 David Black 532 -book 0021898130853 0020130856 us The Elements of Technical Writing (Elements of Series) The Elements of Technical Writing (Elements of Series) Paperback Writing Skills/ General & Reference/ Technical $9.95 $4.49 3.5 Longman 19-12-2000 11-02-2007 Gary Blake/ Robert W. Bly 192 -book 0785342310054 0201310058 us Effective Java Programming Language Guide Effective Java Programming Language Guide Paperback Qualifying Textbooks - Winter 2007 $49.99 $29.95 5 Written for the working Java developer, Joshua Bloch's Effective Java Programming Language Guide provides a truly useful set of over 50 best practices and tips for writing better Java code. With plenty of advice from an indisputable expert in the field, this title is sure to be an indispensable resource for anyone who wants to get more out of their code./ As a veteran developer at Sun, the author shares his considerable insight into the design choices made over the years in Sun's own Java libraries (which the author acknowledges haven't always been perfect). Based on his experience working with Sun's best minds, the author provides a compilation of 57 tips for better Java code organized by category. Many of these ideas will let you write more robust classes that better cooperate with built-in Java APIs. Many of the tips make use of software patterns and demonstrate an up-to-the-minute sense of what works best in today's design. Each tip is clearly introduced and explained with code snippets used to demonstrate each programming principle./ Early sections on creating and destroying objects show you ways to make better use of resources, including how to avoid duplicate objects. Next comes an absolutely indispensable guide to implementing "required" methods for custom classes. This material will help you write new classes that cooperate with old ones (with advice on implementing essential requirements like the equals() and hashCode() methods)./ The author has a lot to say about class design, whether using inheritance or composition. Tips on designing methods show you how to create understandable, maintainable, and robust classes that can be easily reused by others on your team. Sections on mapping C code (like structures, unions, and enumerated types) onto Java will help C programmers bring their existing skills to Sun's new language. Later sections delve into some general programming tips, like using exceptions effectively. The book closes with advice on using threads and synchronization techniques, plus some worthwhile advice on object serialization./ Whatever your level of Java knowledge, this title can make you a more effective programmer. Wisely written, yet never pompous or doctrinaire, the author has succeeded in packaging some really valuable nuggets of advice into a concise and very accessible guidebook that arguably deserves a place on most any developer's bookshelf. --Richard Dragan/ Topics covered:/ • Best practices and tips for Java/ • Creating and destroying objects (static factory methods, singletons, avoiding duplicate objects and finalizers)/ • Required methods for custom classes (overriding equals(), hashCode(), toString(), clone(), and compareTo() properly)/ • Hints for class and interface design (minimizing class and member accessibility, immutability, composition versus inheritance, interfaces versus abstract classes, preventing subclassing, static versus nonstatic classes)/ • C constructs in Java (structures, unions, enumerated types, and function pointers in Java)/ • Tips for designing methods (parameter validation, defensive copies, method signatures, method overloading, zero-length arrays, hints for Javadoc comments)/ • General programming advice (local variable scope, using Java API libraries, avoiding float and double for exact comparisons, when to avoid strings, string concatenation, interfaces and reflection, avoid native methods, optimizing hints, naming conventions)/ • Programming with exceptions (checked versus run-time exceptions, standard exceptions, documenting exceptions, failure-capture information, failure atomicity)/ • Threading and multitasking (synchronization and scheduling hints, thread safety, avoiding thread groups)/ • Serialization (when to implement Serializable, the readObject(), and readResolve() methods)/ Prentice Hall PTR 05-06-2001 07-02-2007 Joshua Bloch 252 -book 9780596000523 0596000529 us Creating Applications with Mozilla Creating Applications with Mozilla Paperback/ Illustrated Web Browsers/ Web Programming $39.95 $3.97 2.5 Mozilla is not just a browser. Mozilla is also a framework that allows developers to create cross-platform applications. This framework is made up of JavaScript, CSS (Cascading Style Sheets), and Mozilla's XUL (XML-based User-interface Language) as well as the Gecko rendering engine, XBL (eXtensible Binding Language), XPCOM (Mozilla's component model), and several other components. Creating Applications with Mozilla explains how applications are created with Mozilla and provides step-by-step information about how you can create your own programs using Mozilla's powerful cross-platform development framework. This book also shows examples of many different types of existing applications to demonstrate some of the possibilities of Mozilla application development. One of Mozilla's biggest advantages for a developer is that Mozilla-based applications are cross-platform, meaning programs work the same on Windows as they do on Linux or the Mac OS. Working through the book, you are introduced to the Mozilla development environment and after installing Mozilla, you quickly learn to create simple applications. After the initial satisfaction of developing your own portable applications, the book branches into topics on modular development and packaging your application. In order to build more complex applications, coverage of XUL, JavaScript, and CSS allow you to discover how to customize and build out your application shell. The second half of the book explores more advanced topics including UI enhancement, localization, and remote distribution. Mozilla 1.0 was released on June 5th, 2002, after more than four years of development as an open source project. This book has been written so that all of the information and examples will work with this release and any of the 1.0.x maintenance releases. In addition to Netscape's Mozilla-based browsers (Netscape 6.x and 7.x), the Mozilla framework has been used to create other browsers such as Galeon and Chimera, and chat clients such as ChatZilla and JabberZilla. Developers have also used Mozilla to create games, development tools, browser enhancements, as well as all sorts of other types of applications. O'Reilly Media 09-02-2002 07-02-2007 David Boswell/ Brian King/ Ian Oeschger/ Pete Collins/ Eric Murphy 480 -book 0031869008357 1558538356 us Life's Little Instruction Book 511 Suggestions, Observations, And Reminders On How To Live A Happy And Rewarding Life Life's Little Instruction Book 511 Suggestions, Observations, And Reminders On How To Live A Happy And Rewarding Life Paperback Ethics & Morality/ New Age/ Gifts/ Collections & Readers/ Spiritual $6.99 $1.70 5 H. Jackson Brown, Jr. originally wrote Life's Little Instruction Book™ as a gift for his son who was leaving home to begin his freshman year in college. Brown says, "I read years ago that it was not the responsibility of parents to pave the road for their children but to provide a road map, and I wanted to provide him with what I had learned about living a happy and rewarding life."Life's Little Instruction Book™ is a guidebook that gently points the way to happiness and fulfillment. The observations are direct, simple, and as practical as an umbrella./ "But it's not just for young people," says Brown. "Most of us already know how to live a successful and purposeful life. We know we should be more understanding and thoughtful, more responsible, courageous and appreciative. It's just that we sometimes need reminding."Life's Little Instruction Book™ is that reminder, as well as the perfect gift for a relative or a friend who needs encouragement at any time of the year./ • Never give up on anybody. Miracles happen every day./ • Be brave. Even if you're not, pretend to be. No one can tell the difference./ • Think big thoughts, but relish small pleasures./ • Learn to listen. Opportunity sometimes knocks softly./ • Never deprive someone of hope; it might be all they have./ • Be kinder than necessary./ • Become the most positive and enthusiastic person you know./ • Commit yourself to constant self-improvement./ • Don't major in minor things./ • Never cut what can be untied./ Since its debut in 1991, Life's Little Instruction Book™ has revolutionized the publishing industry. This little plaid book, which has been embraced the world over, has sold more than nine million copies, spent more than two years atop the New York Times bestseller list, and has been translated into 33 languages. Though originally written as a gift from a father to a son, its simple message has been enjoyed by men and women of all ages around the world./ Rutledge Hill Press 29-09-2000 08-02-2007 H. Jackson Brown -book 0037038174434 1580174434 us The Qigong Year The Qigong Year Hardcover Meditation/ Mental & Spiritual Healing/ Chinese Medicine/ Energy Healing/ Tai Chi & Qi Gong $12.95 $0.98 3 Closely related to the popular Chinese martial art Tai Chi, Qigong (pronounced "chee gong") is an ancient self-healing art that combines movement and meditation in holistic workouts that simultaneously develop body and spirit, promoting overall health and vitality./ Practiced by millions of Chinese for thousands of years, Qigong is now gaining popularity throughout the world. The program of exercises, movements, breathing techniques, and visualizations in The Qigong Year is specially designed to mirror the flow of the seasons. The exercises are illustrated with instructive line drawings, and the book features elegant duotones and patterned art accented with gold metallic ink./ Combining vigorous and gentle movement, Qigong exercises help strengthen the body, improve posture, align the spine, and relax shoulder and neck muscles. Three variations of each exercise - seated, standing, and advanced - are described, enabling people of all ages and abilities to easily practice and enjoy the benefits of Qigong./ Qigong meditation can help stabilize moods - reducing anger, anxiety, and depression - and improve outlook and self-confidence, making it easier to cope with life's challenges. The powerful affirmation technique, based on repeating positive statements that relate to personal problems or desires, can be practiced anywhere - while driving to work, taking a shower, or doing housework. Includes examples of effective affirmations, as well as guidelines for generating personalized ones to target individual health, work, and relationship challenges./ / Storey Publishing, LLC 15-05-2002 06-02-2007 Michael Bruney 256 -book 9780385494717 0385494718 us The Hidden Connections: Integrating The Biological, Cognitive, And Social Dimensions Of Life Into A Science Of Sustainability The Hidden Connections: Integrating The Biological, Cognitive, And Social Dimensions Of Life Into A Science Of Sustainability Hardcover Modern/ History $24.95 $2.00 4.5 The author of the bestselling The Tao of Physics and The Web of Life explores the profound social implications of emerging scientific principles and provides an innovative framework for using them to understand and solve some of the most important issues of our time./ / For most of history, scientific investigation was based on linear thinking. But the 1980's brought a revolutionary change. With the advent of improved computer power, scientists could apply complexity theory--nonlinear thinking--to scientific processes far more easily than ever before. Physicist Fritjof Capra was at the forefront of the revolution, and in The Web of Life he extended its scope by showing the impact of complexity theory on living organisms. In The Hidden Connections he breaks through another frontier, this time applying the principles of complexity theory to an analysis of the broad sphere of all human interactions./ / Capra posits that in order to sustain life in the future, the principles underlying our social institutions must be consistent with the organization that nature has evolved to sustain the "web of life." In a lucid and convincing argument, Capra explains how the theoretical ideas of science can be applied to the practical concerns of our time. Covering every aspect of human nature and society, he discusses such vital matters as the management of human organizations, the challenges and dangers of economic globalization, and the nature and the problems of biotechnology. He concludes with an authoritative, often provocative plan for designing ecologically sustainable communities and technologies as alternatives to the current economic globalization./ / A brilliant, incisive examination of the relationship between science and our social systems, The Hidden Connections will spark enormous debate in the scientific community and inspire us to think about the future of humanity in a new way. Doubleday 20-08-2002 11-02-2007 Fritjof Capra 320 -book 9781570625190 1570625190 us The Tao of Physics The Tao of Physics Paperback Taoism $16.95 $7.95 4 First published in 1975, The Tao of Physics rode the wave of fascination in exotic East Asian philosophies. Decades later, it still stands up to scrutiny, explicating not only Eastern philosophies but also how modern physics forces us into conceptions that have remarkable parallels. Covering over 3,000 years of widely divergent traditions across Asia, Capra can't help but blur lines in his generalizations. But the big picture is enough to see the value in them of experiential knowledge, the limits of objectivity, the absence of foundational matter, the interrelation of all things and events, and the fact that process is primary, not things. Capra finds the same notions in modern physics. Those approaching Eastern thought from a background of Western science will find reliable introductions here to Hinduism, Buddhism, and Taoism and learn how commonalities among these systems of thought can offer a sort of philosophical underpinning for modern science. And those approaching modern physics from a background in Eastern mysticism will find precise yet comprehensible descriptions of a Western science that may reinvigorate a hope in the positive potential of scientific knowledge. Whatever your background, The Tao of Physics is a brilliant essay on the meeting of East and West, and on the invaluable possibilities that such a union promises. --Brian Bruya Shambhala 04-01-2000 11-02-2007 Fritjof Capra 366 -book 9780553346107 0553346105 us Uncommon Wisdom Uncommon Wisdom Paperback Modern $27.00 $0.20 5 Bantam 01-01-1989 11-02-2007 Fritjof Capra 334 -book 9780385476751 0385476752 us The Web of Life The Web of Life Hardcover Chaos & Systems/ Acoustics & Sound/ System Theory/ Ecology $23.95 $5.49 4 The vitality and accessibility of Fritjof Capra's ideas have made him perhaps the most eloquent spokesperson of the latest findings emerging at the frontiers of scientific, social, and philosophical thought. In his international bestsellers The Tao of Physics and The Turning Point, he juxtaposed physics and mysticism to define a new vision of reality. In The Web of Life, Capra takes yet another giant step, setting forth a new scientific language to describe interrelationships and interdependence of psychological, biological, physical, social, and cultural phenomena--the "web of life."/ / / / During the past twenty-five years, scientists have challenged conventional views of evolution and the organization of living systems and have developed new theories with revolutionary philosophical and social implications. Fritjof Capra has been at the forefront of this revolution. In The Web of Life, Capra offers a brilliant synthesis of such recent scientific breakthroughs as the theory of complexity, Gaia theory, chaos theory, and other explanations of the properties of organisms, social systems, and ecosystems. Capra's surprising findings stand in stark contrast to accepted paradigms of mechanism and Darwinism and provide an extraordinary new foundation for ecological policies that will allow us to build and sustain communities without diminishing the opportunities for future generations./ / / / Now available in paperback for the first time, The Web of Life is cutting-edge science writing in the tradition of James Gleick's Chaos, Gregory Bateson's Mind and Matter, and Ilya Prigogine's Order Out of Chaos./ / / From the Trade Paperback edition. DoubleDay 01-09-1996 11-02-2007 Fritjof Capra 368 -book 9780880222785 0880222786 us DBase III Plus Advanced Programming DBase III Plus Advanced Programming Paperback dBASE $22.95 $1.85 Que Corporation,U.S. 02-02-1987 07-02-2007 Joseph-David Carrabis 300 -book 9780810807303 0810807300 us Building library collections, Building library collections, Unknown Binding Library Management $0.99 Scarecrow Press 08-02-1974 08-02-2007 Mary Duncan Carter 415 -book 9780894070174 0894070177 us The Book of Internal Exercises The Book of Internal Exercises Hardcover $12.95 $0.01 4 Strawberry Hill Pr 12-02-1978 11-02-2007 Stephen Thomas Chang 138 -book 9781416915546 1416915540 us The I Chong: Meditations from the Joint The I Chong: Meditations from the Joint Hardcover Entertainers/ Criminals $23.95 $9.00 4.5 / Beloved stoner comedian TOMMY CHONG is now older, wiser, and officially an EX-CON./ / / On the morning of February 24, 2003, agents of the U.S. Drug Enforcement Administration launched a sting called Operation Pipe Dreams and forced themselves through the door of Tommy's California home, with automatic weapons drawn. As a result of the raid on his home; the simultaneous ransacking of his son's company, Chong Glass; and the Bush administration's determination to make an example out of the "Pope of Pot;" he was sentenced to nine months in prison because his company shipped bongs to a head shop in Pennsylvania that was a front for the DEA./ / / Well . . . now it's Tommy Chong's turn to fight back and tell his side of the story./ / / Beginning with Tommy's experiences growing up in Canada in the forties and fifties as a mixed-race kid and going on to become a comedy legend, The I Chong is at once a memoir, a spiritual exploration of his time in prison, and a political indictment of the eroding civil liberties in post-9//11 American society. He tells the unbelievable story of his trip down the rabbit hole of America's war on drugs and of his experiences in the federal prison system, and he offers up timely observations on combating the conservative political forces at work in this country. Introspective, inspiring, and incendiary, The I Chong is a unique chronicle of one man's life and how his humorous and spiritual point of view saved him during his wrongful incarceration at the hands of an administration without boundaries./ Simon Spotlight Entertainment 08-08-2006 10-02-2007 Tommy Chong 224 -book 9780974514031 0974514039 us Pragmatic Project Automation: How to Build, Deploy, and Monitor Java Apps Pragmatic Project Automation: How to Build, Deploy, and Monitor Java Apps Paperback/ Illustrated Software Development/ Software Project Management $29.95 $15.65 4.5 Forget wizards, you need a slave--someone to do your repetitive, tedious and boring tasks, without complaint and without pay, so you'll have more time to design and write exciting code. Indeed, that's what computers are for. You can enlist your own computer to automate all of your project's repetitive tasks, ranging from individual builds and running unit tests through to full product release, customer deployment, and monitoring the system. Many teams try to do these tasks by hand. That's usually a really bad idea: people just aren't as good at repetitive tasks as machines. You run the risk of doing it differently the one time it matters, on one machine but not another, or doing it just plain wrong. But the computer can do these tasks for you the same way, time after time, without bothering you. You can transform these labor-intensive, boring and potentially risky chores into automatic, background processes that just work. In this eagerly anticipated book, you'll find a variety of popular, open-source tools to help automate your project. With this book, you will learn:/ • How to make your build processes accurate, reliable, fast, and easy./ • How to build complex systems at the touch of a button./ • How to build, test, and release software automatically, with no human intervention./ • Technologies and tools available for automation: which to use and when./ • Tricks and tips from the masters (do you know how to have your cell phone tell you that your build just failed?)/ You'll find easy-to-implement recipes to automate your Java project, using the same popular style as the rest of our Jolt Productivity Award-winning Starter Kit books. Armed with plenty of examples and concrete, pragmatic advice, you'll find it's easy to get started and reap the benefits of modern software development. You can begin to enjoy pragmatic, automatic, unattended software production that's reliable and accurate every time. The Pragmatic Programmers 08-02-2004 07-02-2007 Mike Clark 176 -book 9780912381039 0912381035 us Traditional Acupuncture: The Law of the Five Elements Traditional Acupuncture: The Law of the Five Elements Paperback Acupuncture & Acupressure/ Massage/ Pharmacology/ Chinese Medicine $16.00 $11.29 4 Traditional Acupuncture Institute, Incorporat 10-02-1994 11-02-2007 Dianne M. Connelly 192 -book 9780262032933 0262032937 us Introduction to Algorithms, Second Edition Introduction to Algorithms, Second Edition Hardcover Beginner's Guides/ Information Systems/ Qualifying Textbooks - Winter 2007 $82.00 $56.99 4 Aimed at any serious programmer or computer science student, the new second edition of Introduction to Algorithms builds on the tradition of the original with a truly magisterial guide to the world of algorithms. Clearly presented, mathematically rigorous, and yet approachable even for the math-averse, this title sets a high standard for a textbook and reference to the best algorithms for solving a wide range of computing problems./ With sample problems and mathematical proofs demonstrating the correctness of each algorithm, this book is ideal as a textbook for classroom study, but its reach doesn't end there. The authors do a fine job of explaining each algorithm. (Reference sections on basic mathematical notation will help readers bridge the gap, but it will help to have some math background to appreciate the full achievement of this handsome hardcover volume.) Every algorithm is presented in pseudo-code, which can be implemented in any computer language, including C//C++ and Java. This ecumenical approach is one of the book's strengths. When it comes to sorting and common data structures, from basic linked lists to trees (including binary trees, red-black, and B-trees), this title really shines, with clear diagrams that show algorithms in operation. Even if you just glance over the mathematical notation here, you can definitely benefit from this text in other ways./ The book moves forward with more advanced algorithms that implement strategies for solving more complicated problems (including dynamic programming techniques, greedy algorithms, and amortized analysis). Algorithms for graphing problems (used in such real-world business problems as optimizing flight schedules or flow through pipelines) come next. In each case, the authors provide the best from current research in each topic, along with sample solutions./ This text closes with a grab bag of useful algorithms including matrix operations and linear programming, evaluating polynomials, and the well-known Fast Fourier Transformation (FFT) (useful in signal processing and engineering). Final sections on "NP-complete" problems, like the well-known traveling salesman problem, show off that while not all problems have a demonstrably final and best answer, algorithms that generate acceptable approximate solutions can still be used to generate useful, real-world answers./ Throughout this text, the authors anchor their discussion of algorithms with current examples drawn from molecular biology (like the Human Genome Project), business, and engineering. Each section ends with short discussions of related historical material, often discussing original research in each area of algorithms. On the whole, they argue successfully that algorithms are a "technology" just like hardware and software that can be used to write better software that does more, with better performance. Along with classic books on algorithms (like Donald Knuth's three-volume set, The Art of Computer Programming), this title sets a new standard for compiling the best research in algorithms. For any experienced developer, regardless of their chosen language, this text deserves a close look for extending the range and performance of real-world software. --Richard Dragan/ Topics covered: Overview of algorithms (including algorithms as a technology); designing and analyzing algorithms; asymptotic notation; recurrences and recursion; probabilistic analysis and randomized algorithms; heapsort algorithms; priority queues; quicksort algorithms; linear time sorting (including radix and bucket sort); medians and order statistics (including minimum and maximum); introduction to data structures (stacks, queues, linked lists, and rooted trees); hash tables (including hash functions); binary search trees; red-black trees; augmenting data structures for custom applications; dynamic programming explained (including assembly-line scheduling, matrix-chain multiplication, and optimal binary search trees); greedy algorithms (including Huffman codes and task-scheduling problems); amortized analysis (the accounting and potential methods); advanced data structures (including B-trees, binomial and Fibonacci heaps, representing disjoint sets in data structures); graph algorithms (representing graphs, minimum spanning trees, single-source shortest paths, all-pairs shortest paths, and maximum flow algorithms); sorting networks; matrix operations; linear programming (standard and slack forms); polynomials and the Fast Fourier Transformation (FFT); number theoretic algorithms (including greatest common divisor, modular arithmetic, the Chinese remainder theorem, RSA public-key encryption, primality testing, integer factorization); string matching; computational geometry (including finding the convex hull); NP-completeness (including sample real-world NP-complete problems and their insolvability); approximation algorithms for NP-complete problems (including the traveling salesman problem); reference sections for summations and other mathematical notation, sets, relations, functions, graphs and trees, as well as counting and probability backgrounder (plus geometric and binomial distributions)./ The MIT Press 01-09-2001 07-02-2007 Thomas H. Cormen/ Charles E. Leiserson/ Ronald L. Rivest/ Clifford Stein 1184 -book 9781932394610 1932394613 us Ajax in Action Ajax in Action Paperback/ Illustrated HTML - General/ Internet/ Qualifying Textbooks - Winter 2007 $44.95 $17.97 4 Val's Blog "A tremendously useful field guide specifically written for developers down in the trenches...waiting for the killer solution..."/ Web users are getting tired of the traditional web experience. They get frustrated losing their scroll position; they get annoyed waiting for refresh; they struggle to reorient themselves on every new page. And the list goes on. With asynchronous JavaScript and XML, known as "Ajax," you can give them a better experience. Once users have experienced an Ajax interface, they hate to go back. Ajax is new way of thinking that can result in a flowing and intuitive interaction with the user./ Ajax in Action helps you implement that thinking--it explains how to distribute the application between the client and the server (hint: use a "nested MVC" design) while retaining the integrity of the system. You will learn how to ensure your app is flexible and maintainable, and how good, structured design can help avoid problems like browser incompatibilities. Along the way it helps you unlearn many old coding habits. Above all, it opens your mind to the many advantages gained by placing much of the processing in the browser. If you are a web developer who has prior experience with web technologies, this book is for you./ Manning Publications 01-10-2005 07-02-2007 Dave Crane/ Eric Pascarello/ Darren James 650 -book 9780802065193 0802065198 us Design with Type Design with Type Paperback Typography/ Manufacturing/ General & Reference/ Qualifying Textbooks - Winter 2007 $23.95 $20.00 5 Design with Type takes the reader through a study of typography that starts with the individual letter and proceeds through the word, the line, and the mass of text. The contrasts possible with type are treated in detail, along with their applications to the typography ofbooks, advertising, magazines, and information data. The various contending schools oftypography are discussed, copiously illustrated with the author's selection of over 150 examples of imaginative typography from many parts ot the world./ Design with Type differs from all other books on typography in that it discusses type as a design material as well as a means of communication: the premise is that if type is understood in terms of design, the user of type will be better able to work with it to achieve maximum legibility and effectiveness, as well as aesthetic pleasure. Everyone who uses type, everyone who enjoys the appearance of the printed word, will find Design with Type informative and fascinating. It provides, too, an outstanding example of the effectiveness of imaginative and tasteful typographic design./ University of Toronto Press 14-06-2000 11-02-2007 Carl Dair 162 -book 0676251833904 0804833907 us Making Out in Chinese (Making Out (Tuttle)) Making Out in Chinese (Making Out (Tuttle)) Paperback Chinese/ Phrasebooks - General $7.95 $4.00 3.5 Tuttle Publishing 11-02-2003 11-02-2007 Ray Daniels 96 -book 9780062502230 0062502239 us 365 Tao: Daily Meditations 365 Tao: Daily Meditations Paperback Taoism/ New Age/ Prayerbooks/ Meditations $15.95 $2.99 5 Umbrella, light, landscape, sky— 
There is no language of the holy. 
The sacred lies in the ordinary./ This treasury of life-enhancing daily readings turns a wise Taoist light on every facet of life. Each daily entry with a one-word title and its Chinese character in elegant calligraphy./ A brief, poetic aphorism provides the theme, followed by a clear, insightful mediation on the day's Taoist principle./ HarperSanFrancisco 17-07-1992 08-02-2007 Ming-Dao Deng 400 -book 9780460874113 046087411X us A Discourse on Method (Everyman's Library (Paper)) A Discourse on Method (Everyman's Library (Paper)) Paperback Classics/ History, 17th & 18th Century/ Modern/ Methodology & Statistics/ Applied/ Geometry & Topology/ Meteorology/ Experiments, Instruments & Measurement $7.95 $3.95 4.5 By calling everything into doubt, Descartes laid the foundations of modern philosophy. He deduced that human beings consist of minds and bodies; that these are totally distinct "substances"; that God exists and that He ensures we can trust the evidence of our senses. Ushering in the "scientific revolution" of Galileo and Newton, Descartes' ideas swept aside ancient and medieval traditions of philosophical methods and investigation. Tuttle Publishing 11-02-2007 Rene Descartes 300 -book 9780140286786 0140286780 us Your Money or Your Life: Transforming Your Relationship with Money and Achieving Financial Independence Your Money or Your Life: Transforming Your Relationship with Money and Achieving Financial Independence Paperback Public Finance/ Financial Planning/ Money Management/ Contemporary $15.00 $5.00 4.5 There's a big difference between "making a living" and making a life. Do you spend more than you earn? Does making a living feel more like making a dying? Do you dislike your job but can't afford to leave it? Is money fragmenting your time, your relationships with family and friends? If so, Your Money or Your Life is for you./ From this inspiring book, learn how to/ • get out of debt and develop savings/ • reorder material priorities and live well for less/ • resolve inner conflicts between values and lifestyles/ • convert problems into opportunities to learn new skills/ • attain a wholeness of livelihood and lifestyle/ • save the planet while saving money/ • and much more/ Penguin (Non-Classics) 01-09-1999 11-02-2007 Joe Dominguez/ Vicki Robin 400 -book 9780262541480 0262541483 us The Scheme Programming Language, 3rd Edition The Scheme Programming Language, 3rd Edition Paperback Qualifying Textbooks - Winter 2007 $37.00 $22.98 5 This thoroughly updated edition of The Scheme Programming Language provides an introduction to Scheme and a definitive reference for standard Scheme, presented in a clear and concise manner. Written for professionals and students with some prior programming experience, it begins by leading the programmer gently through the basics of Scheme and continues with an introduction to some of the more advanced features of the language. Many exercises are presented to help reinforce the lessons learned, and answers to the exercises are given in a new appendix. Most of the remaining chapters are dedicated to the reference material, which describes in detail the standard features of Scheme included in the Revised Report on Scheme and the ANSI//IEEE standard for Scheme./ / Numerous examples are presented throughout the introductory and reference portions of the text, and a unique set of extended example programs and applications, with additional exercises, are presented in the final chapter. Reinforcing the book's utility as a reference text are appendixes that present the formal syntax of Scheme, a summary of standard forms and procedures, and a bibliography of Scheme resources. The Scheme Programming Language stands alone as an introduction to and essential reference for Scheme programmers. It is also useful as a supplementary text for any course that uses Scheme./ / The Scheme Programming Language is illustrated by artist Jean-Pierre Hebert, who writes Scheme programs to extend his ability to create sophisticated works of digital art. The MIT Press 01-10-2003 08-02-2007 R. Kent Dybvig 329 -book 9780874775136 0874775132 us Drawing on the right side of the Brain Drawing on the right side of the Brain Paperback Reference/ Study & Teaching/ Creativity/ Drawing $15.95 $1.08 4.5 Tarcher 01-05-1989 11-02-2007 Betty Edwards 254 -book 9780912111193 0912111194 us Grasping the Wind (Paradigm Title) Grasping the Wind (Paradigm Title) Paperback Acupuncture & Acupressure/ Reference/ Pharmacology $28.95 $19.75 4 Point names, the traditional means for identifying acupoints, have meanings that are, like the wind, hard to grasp. Yet enfolded in these often poetic words is a utility that involves the complex associations derived from the evolution of the Chinese language and the vast array of therapeutic nalogies found in traditional medical works./ In discussing the point names, the authors examine the meaning, context, and significance of each acupuncture point to promote understanding of the point's use in acupuncture practice. Guidelines for understanding the nature and structure of the Chinese language are offered, along with discussions of the Chinese rationale for naming points and point groupings. The reasoning for selecting the English names is offered so that readers may adapt the names for their own use. Each of the 363 points covered is listed according to the system currently used in China. Descriptions include the name in Chinese characters, in Pinyin, and in English. The classical location according to major Chinese texts, the associated point groupings, an explanation of point functions, and classical energetic associations are also noted. Further detail is provided by inclusion of channel relationships, five-phase relationships, and qi functions. Additional notes detail linguistic and practical considerations that have accrued to the point over time. Alternate names for the point are given, again in Chinese, Pinyin, and English. Indexes provide stroke order listings, point group names, and point lists for each character. A glossary of all the characters used in point names provides a definition for each Chinese character. This book provides much valuable theoretical and therapeutic information./ Paradigm Publications (MA) 06-02-1989 11-02-2007 Andrew Ellis/ Nigel Wiseman/ Ken Boss 462 -book 9780064539142 0064539148 us Anatomy Coloring Book Anatomy Coloring Book Paperback Anatomy/ Fundamentals & Skills $15.00 $0.01 4.5 This unique learning tool teaches anatomical concepts and illustrates all the structures and systems of the body through coloring exercises, an effective teaching device that also aids in the retention of the material. 163 detailed illustrations are organized according to body system and a color-key system links terminology to illustrations, reinforcing learning and impressing upon students the visual details of anatomy. Harpercollins Publisher 01-02-1997 06-02-2007 Lawrence Elson 142 -book 0076092021193 0321136497 us Servlets and JSP: The J2EE Web Tier Servlets and JSP: The J2EE Web Tier Paperback Web Site Design/ Internet/ Servlets/ Qualifying Textbooks - Winter 2007 $54.99 $11.99 3.5 Addison-Wesley Professional 29-08-2003 07-02-2007 Jayson Falkner/ Kevin R Jones 784 -book 9781932394443 1932394443 us IntelliJ IDEA in Action (In Action series) IntelliJ IDEA in Action (In Action series) Paperback/ Illustrated Software Development $44.95 $22.29 4.5 This book will help developers dig a little deeper into IDEA and embrace its streamlining features which allow for more time to be spent on project design rather than code management. Without some educational investment, however, IDEA can be just another editor. That then, is the purpose of this book. To not only get you up and running quickly, but to teach you how to use IDEA's powerful software development tools to their fullest advantage. Important product features, including the debugger, source code control, and the many code generation tools, are carefully explained and accompanied by tips and tricks that will leave even experienced IDEA users with "Eureka!" moments of informed programming. Coders just graduating from NOTEPAD and Java IDE veterans alike will profit from the powerful and timesaving expertise provided in this essential programmer's resource./ IDEA is a next-generation IDE for Java, an Integrated Development Environment. As the term IDE implies, IDEA integrates or combines all of the tools needed to develop Java software into a single application and interface. In other words, IDEA is a tool that helps develop Java applications more quickly, easily, and intelligently. IDEA can help with every phase of a project, from design and development to testing and deployment. This book is based on the IntelliJ IDEA Java development environment software from JetBrains, version 5.0./ Manning Publications 01-03-2006 07-02-2007 Duane K. Fields/ Stephen Saunders/ Eugene Belayev 450 -book 0636920924876 1565924878 us Java in a Nutshell : A Desktop Quick Reference (Java Series) Java in a Nutshell : A Desktop Quick Reference (Java Series) (3rd Edition) 3rd Paperback Reference/ Networking/ Object-Oriented Design/ Nutshell/ Java $29.95 $0.01 4 The 3rd edition of the well-known reference, Java in a Nutshell, covers the essential APIs of Java 1.2, including networking, security, input and output, and basic language and utility classes. Due to the size of the Java 1.2 API, graphics and graphical user interface classes are now examined in a volume called Java Foundation Classes in a Nutshell, and server-side and enterprise programming are detailed in Java Enterprise in a Nutshell./ Though primarily a reference, the book starts off with a thorough, fast-paced introduction to Java, exploring all the key topics, including syntax, object-oriented programming, security, beans, and tools. These discussions are brief and very information-dense, and if you are buying this book to learn the language, you will probably be overwhelmed by the speed of this initiation./ This book intends to document quite a bit of Java, and it easily succeeds with broad coverage of Java programming in Part I, and API cataloging broken down by package in Part II. For example, discussions in Part I explain Types, Reflection, and Dynamic Loading. The handling of these topics takes a little over a page, but the book gives a useful overview with code examples that clearly illustrate the points made. It is one of the clearest and most concise treatments of these three topics available./ The chapters in Part II include an introduction, diagrams, and sections for each class in the package. The sections for each class can be very informative, as in the discussion of the Socket class in the java.net chapter, which includes how to instantiate a Socket object, getting I//O streams from the object you instantiated, and how to alter the behavior of sockets. This discussion, like most in this book, is brief, clear, and to the point./ If you are looking for a Java reference, this is a solid volume that will provide lasting value. --John Keogh/ Topics covered: Part I, "Introducing Java," provides broad coverage of Java programming topics, including data types, syntax, classes, and objects; Java file structure; inner classes; interfaces; packages; creating and initializing objects; destroying and finalizing objects; input//output; cryptography; networking; security; threads; JavaBeans; JavaDoc; and tools that come with Java 1.2 SDK./ Part II, "The Java API Quick Reference," includes chapters on the following Java packages: java.beans, java.beans.beancontext, java.io, java.lang, java.lang.ref, java.lang.reflect, java.math, java.net, java.security, java.security.acl, java.security.cert, java.security.interfaces, java.security.spec, java.text, java.util, java.util.jar, java.util.zip, javax.crypto, javax.crypto.interfaces, javax.crypto.spec, and a final chapter, which provides an index for classes, methods, and fields./ O'Reilly 11-02-1999 07-02-2007 David Flanagan 666 -book 9780596000486 0596000480 us JavaScript: The Definitive Guide JavaScript: The Definitive Guide Paperback/ Illustrated Web Site Design/ JavaScript/ Object-Oriented Design/ HTML - General/ Utilities/ Web Programming/ Qualifying Textbooks - Winter 2007 $44.95 $7.98 4.5 Since the earliest days of Internet scripting, Web developers have considered JavaScript: The Definitive Guide an essential resource. David Flanagan's approach, which combines tutorials and examples with easy-to-use syntax guides and object references, suits the typical programmer's requirements nicely. The brand-new fourth edition of Flanagan's "Rhino Book" includes coverage of JavaScript 1.5, JScript 5.5, ECMAScript 3, and the Document Object Model (DOM) Level 2 standard from the World Wide Web Consortium (W3C). Interestingly, the author has shifted away from specifying--as he did in earlier editions--what browsers support each bit of the language. Rather than say Netscape 3.0 supports the Image object while Internet Explorer 3.0 does not, he specifies that JavaScript 1.1 and JScript 3.0 support Image. More usefully, he specifies the contents of independent standards like ECMAScript, which encourages scripters to write applications for these standards and browser vendors to support them. As Flanagan says, JavaScript and its related subjects are very complex in their pure forms. It's impossible to keep track of the differences among half a dozen vendors' generally similar implementations. Nonetheless, a lot of examples make reference to specific browsers' capabilities./ Though he does not cover server-side APIs, Flanagan has chosen to separate coverage of core JavaScript (all the keywords, general syntax, and utility objects like Array) from coverage of client-side JavaScript (which includes objects, like History and Event, that have to do with Web browsers and users' interactions with them. This approach makes this book useful to people using JavaScript for applications other than Web pages. By the way, the other classic JavaScript text--Danny Goodman's JavaScript Bible--isn't as current as this book, but it's still a fantastic (and perhaps somewhat more novice-friendly) guide to the JavaScript language and its capabilities. --David Wall/ Topics covered: The JavaScript language (version 1.0 through version 1.5) and its relatives, JScript and ECMAScript, as well as the W3C DOM standards they're often used to manipulate. Tutorial sections show how to program in JavaScript, while reference sections summarize syntax and options while providing copious code examples./ O'Reilly Media 15-12-2001 07-02-2007 David Flanagan 900 -book 9780596007386 0596007388 us Java 5.0 Tiger: A Developer's Notebook Java 5.0 Tiger: A Developer's Notebook Paperback/ Illustrated Perl/ Java $29.95 $3.97 4.5 Java 5.0, code-named "Tiger", promises to be the most significant new version of Java since the introduction of the language. With over a hundred substantial changes to the core language, as well as numerous library and API additions, developers have a variety of new features, facilities, and techniques available. But with so many changes, where do you start? You could read through the lengthy, often boring language specification; you could wait for the latest 500 page tome on concepts and theory; you could even play around with the new JDK, hoping you figure things out--or you can get straight to work with Java 5.0 Tiger: A Developer's Notebook. This no-nonsense, down-and-dirty guide by bestselling Java authors Brett McLaughlin and David Flanagan skips all the boring prose and lecture, and jumps right into Tiger. You'll have a handle on the important new features of the language by the end of the first chapter, and be neck-deep in code before you hit the halfway point. Using the task-oriented format of this new series, you'll get complete practical coverage of generics, learn how boxing and unboxing affects your type conversions, understand the power of varargs, learn how to write enumerated types and annotations, master Java's new formatting methods and the for//in loop, and even get a grip on concurrency in the JVM. Light on theory and long on practical application, Java 5.0 Tiger: A Developer's Notebook allows you to cut to the chase, getting straight to work with Tiger's new features. The new Developer's Notebooks series from O'Reilly covers important new tools for software developers. Emphasizing example over explanation and practice over theory, they focus on learning by doing--you'll get the goods straight from the masters, in an informal and code-intensive style that suits developers. If you've been curious about Tiger, but haven't known where to start, this no-fluff, lab-style guide is the solution. O'Reilly Media 25-06-2004 07-02-2007 David Flanagan/ Brett McLaughlin 177 -book 9780936185514 0936185511 us Imperial Secrets of Health and Longevity Imperial Secrets of Health and Longevity Paperback New Age/ History/ Tai Chi & Qi Gong $12.95 $8.50 3.5 The 14 secrets of longevity of the Qing Dynasty Emperor, Qian Long, cover all aspects of living a long and healthy life. This book offers Qian Long's sage advice on the role of diet, exercise, relaxation, emotions, sex, and environment in achieving long life and good health. This traditional Chinese medical theory includes self-massage, stretching, and qi gong exercise as well as how to use Chinese tonic herbs. Blue Poppy Press 03-02-1999 06-02-2007 Bob Flaws 113 -book 9780936185521 093618552X us Statements of Fact in Traditional Chinese Medicine Statements of Fact in Traditional Chinese Medicine Paperback Basic Science/ History $15.95 $33.04 4.5 At last, what every tcm student has been looking for, a linguistically accurate, succinct list of the key statements of fact in tcm which, as a style of Chinese medicine, is largely a word game. However, to play the game, one needs to know the words. Hopefully, this book will help Western students gain both clarity and proficiency in the process and practice of doing tcm. When supplemented by a teacher, introductory grammar, and a dictionary, this book can quickly and efficiently help teach English language students and practitioners how to read medical Chinese and thus gain access to the vast library of Chinese medical literature. Blue Poppy Press 01-01-1994 06-02-2007 Bob Flaws 107 -book 9780936185927 0936185929 us The Tao of Healthy Eating The Tao of Healthy Eating Paperback Healthy/ Chinese Medicine $15.95 $8.21 4 Chinese dietary therapy is one of the most important aspects of Chinese medicine. The Tao of Healthy Eating illuminates the theory and practice of Chinese dietary therapy with emphasis on the concerns and attitudes of Westerners. Commonsense metaphors explain basic Chinese medical theories and their application in preventive and remedial dietary therapy. It features a clear description of the Chinese medical understanding of digestion and all the practical implications if this for day-to-day diet. Issues of Western interest are discussed, such as raw versus cooked foods, high cholesterol, food allergies, and candidacies. It includes the Chinese medical descriptions of 200 Western food and similar information on vitamins, minerals, and amino acids. Blue Poppy Press 01-01-1998 06-02-2007 Bob Flaws 128 -book 9781932394184 1932394184 us Java Reflection in Action (In Action series) Java Reflection in Action (In Action series) Paperback/ Illustrated $44.95 $17.99 4.5 Explaining the Java Reflection API and providing techniques for using it effectively, this guide describes the capabilities that allow a program to examine and modify itself at runtime. The java.lang.reflect package and its uses are covered, including a detailed discussion of Java's dynamic proxy facility. Less obvious reflective capabilities, such as call stack introspection and the Java class loader, are addressed. In recognition of the limitations of Java Reflection, the various ways to use Reflection to generate code and surpass these limitations are detailed. A discussion of performance analysis techniques and a look ahead at what is new in JDK 1.5 is included./ Manning Publications 10-02-2004 07-02-2007 Ira R. Forman/ Nate Forman 300 -book 9780977616602 0977616606 us Rails Recipes (Pragmatic Programmers) Rails Recipes (Pragmatic Programmers) Paperback Object-Oriented Design/ Internet $32.95 $22.46 4.5 Rails is large, powerful, and new. How do you use it effectively? How do you harness the power? And, most important, how do you get high quality, real-world applications written?/ From the latest Ajax effects to time-saving automation tips for your development process, Rails Recipes will show you how the experts have already solved the problems you have./ • Use generators to automate repetitive coding tasks./ • Create sophisticated role-based authentication schemes./ • Add live search and live preview to your site./ • Run tests when anyone checks code in./ • How to create tagged data the right way./ • and many, many more.../ Owning Rails Recipes is like having the best Rails programmers sitting next to you while you code./ Pragmatic Bookshelf 01-06-2006 01-02-2007 3 Chad Fowler 332 -book 0785342485677 0201485672 us Refactoring: Improving the Design of Existing Code Refactoring: Improving the Design of Existing Code Hardcover Object-Oriented Design/ Design Tools & Techniques/ Qualifying Textbooks - Winter 2007 $59.99 $39.99 4.5 Your class library works, but could it be better? Refactoring: Improving the Design of Existing Code shows how refactoring can make object-oriented code simpler and easier to maintain. Today refactoring requires considerable design know-how, but once tools become available, all programmers should be able to improve their code using refactoring techniques./ Besides an introduction to refactoring, this handbook provides a catalog of dozens of tips for improving code. The best thing about Refactoring is its remarkably clear presentation, along with excellent nuts-and-bolts advice, from object expert Martin Fowler. The author is also an authority on software patterns and UML, and this experience helps make this a better book, one that should be immediately accessible to any intermediate or advanced object-oriented developer. (Just like patterns, each refactoring tip is presented with a simple name, a "motivation," and examples using Java and UML.)/ Early chapters stress the importance of testing in successful refactoring. (When you improve code, you have to test to verify that it still works.) After the discussion on how to detect the "smell" of bad code, readers get to the heart of the book, its catalog of over 70 "refactorings"--tips for better and simpler class design. Each tip is illustrated with "before" and "after" code, along with an explanation. Later chapters provide a quick look at refactoring research./ Like software patterns, refactoring may be an idea whose time has come. This groundbreaking title will surely help bring refactoring to the programming mainstream. With its clear advice on a hot new topic, Refactoring is sure to be essential reading for anyone who writes or maintains object-oriented software. --Richard Dragan/ Topics Covered: Refactoring, improving software code, redesign, design tips, patterns, unit testing, refactoring research, and tools./ Addison-Wesley Professional 28-06-1999 07-02-2007 Martin Fowler/ Kent Beck/ John Brant/ William Opdyke/ Don Roberts 464 -book 0785342657838 020165783X us UML Distilled: A Brief Guide to the Standard Object Modeling Language UML Distilled: A Brief Guide to the Standard Object Modeling Language (2nd Edition) 2nd Paperback Object-Oriented Design/ Software Development/ UML $34.99 $2.95 4 The second edition of Martin Fowler's bestselling UML Distilled provides updates to the Unified Modeling Language (UML) without changing its basic formula for success. It is still arguably the best resource for quick, no-nonsense explanations of using UML./ The major strength of UML Distilled is its short, concise presentation of the essentials of UML and where it fits within today's software development process. The book describes all the major UML diagram types, what they're for, and the basic notation involved in creating and deciphering them. These diagrams include use cases; class and interaction diagrams; collaborations; and state, activity, and physical diagrams. The examples are always clear, and the explanations cut to the fundamental design logic./ For the second edition, the material has been reworked for use cases and activity diagrams, plus there are numerous small tweaks throughout, including the latest UML v. 1.3 standard. An appendix even traces the evolution of UML versions./ Working developers often don't have time to keep up with new innovations in software engineering. This new edition lets you get acquainted with some of the best thinking about efficient object-oriented software design using UML in a convenient format that will be essential to anyone who designs software professionally. --Richard Dragan/ Topics covered: UML basics, analysis and design, outline development (software development process), inception, elaboration, managing risks, construction, transition, use case diagrams, class diagrams, interaction diagrams, collaborations, state diagrams, activity diagrams, physical diagrams, patterns, and refactoring basics./ Addison-Wesley Professional 25-08-1999 07-02-2007 Martin Fowler/ Kendall Scott 185 -book 0752063320839 0672320835 us The Ruby Way The Ruby Way Paperback Object-Oriented Design/ Qualifying Textbooks - Winter 2007 $39.99 $13.69 4.5 The Ruby Way assumes that the reader is already familiar with the subject matter. Using many code samples it focuses on "how-to use Ruby" for specific applications, either as a stand-alone language, or in conjunction with other languages./ Topics covered include:/ • Simple data tasks;/ • Manipulating structured data;/ • External data manipulation;/ • User interfaces;/ • Handling threads;/ • System programming;/ • Network and web programming;/ • Tools and utilities./ Note: The appendices offer instruction on migrating from Perl and Python to Ruby, and extending Ruby in C and C++./ Sams 17-12-2001 07-02-2007 Hal Fulton 600 -book 0785342633610 0201633612 us Design Patterns: Elements of Reusable Object-Oriented Software (Addison-Wesley Professional Computing Series) Design Patterns: Elements of Reusable Object-Oriented Software (Addison-Wesley Professional Computing Series) Hardcover Object-Oriented Design/ Software Development/ Software Reuse/ HTML - General/ Gangs/ Design Tools & Techniques/ Object-Oriented Software Design/ Qualifying Textbooks - Winter 2007 $54.99 $30.99 4.5 Design Patterns is a modern classic in the literature of object-oriented development, offering timeless and elegant solutions to common problems in software design. It describes patterns for managing object creation, composing objects into larger structures, and coordinating control flow between objects. The book provides numerous examples where using composition rather than inheritance can improve the reusability and flexibility of code. Note, though, that it's not a tutorial but a catalog that you can use to find an object-oriented design pattern that's appropriate for the needs of your particular application--a selection for virtuoso programmers who appreciate (or require) consistent, well-engineered object-oriented designs. Addison-Wesley Professional 15-01-1995 07-02-2007 Erich Gamma/ Richard Helm/ Ralph Johnson/ John Vlissides 395 -book B0006AXOU2 us Gravity: [classic and modern views] (Science study series) Gravity: [classic and modern views] (Science study series) Unknown Binding Astrophysics & Space Science $0.99 Anchor Books 08-02-1962 08-02-2007 George Gamow 157 -book 9781422103296 1422103293 us Changing Minds: The Art And Science of Changing Our Own And Other People's Minds (Leadership for the Common Good) Changing Minds: The Art And Science of Changing Our Own And Other People's Minds (Leadership for the Common Good) Paperback Leadership/ Management/ Motivational/ Applied Psychology/ Cognitive $14.95 $5.99 3 Think about the last time you tried to change someone’s mind about something important: a voter’s political beliefs; a customer’s favorite brand; a spouse’s decorating taste. Chances are you weren’t successful in shifting that person’s beliefs in any way. In his book, Changing Minds, Harvard psychologist Howard Gardner explains what happens during the course of changing a mind – and offers ways to influence that process./ Remember that we don’t change our minds overnight, it happens in gradual stages that can be powerfully influenced along the way.This book provides insights that can broaden our horizons and shape our lives./ Harvard Business School Press 30-09-2006 08-02-2007 Howard Gardner 244 -book 9780596007331 0596007337 us We the Media We the Media Hardcover Culture/ Government/ Internet Publishing/ Journalism/ Media Studies/ Weblogs/ Technology & Society $24.95 $3.49 4.5 Grassroots journalists are dismantling Big Media's monopoly on the news, transforming it from a lecture to a conversation. Not content to accept the news as reported, these readers-turned-reporters are publishing in real time to a worldwide audience via the Internet. The impact of their work is just beginning to be felt by professional journalists and the newsmakers they cover. In We the Media: Grassroots Journalism by the People, for the People, nationally known business and technology columnist Dan Gillmor tells the story of this emerging phenomenon, and sheds light on this deep shift in how we make and consume the news. We the Media is essential reading for all participants in the news cycle:/ • Consumers learn how they can become producers of the news. Gillmor lays out the tools of the grassroots journalist's trade, including personal Web journals (called weblogs or blogs), Internet chat groups, email, and cell phones. He also illustrates how, in this age of media consolidation and diminished reporting, to roll your own news, drawing from the array of sources available online and even over the phone./ • Newsmakers politicians, business executives, celebrities get a wake-up call. The control that newsmakers enjoyed in the top-down world of Big Media is seriously undermined in the Internet Age. Gillmor shows newsmakers how to successfully play by the new rules and shift from control to engagement./ • Journalists discover that the new grassroots journalism presents opportunity as well as challenge to their profession. One of the first mainstream journalists to have a blog, Gillmor says, "My readers know more than I do, and that's a good thing." In We the Media, he makes the case to his colleagues that, in the face of a plethora of Internet-fueled news vehicles, they must change or become irrelevant./ At its core, We the Media is a book about people. People like Glenn Reynolds, a law professor whose blog postings on the intersection of technology and liberty garnered him enough readers and influence that he became a source for professional journalists. Or Ben Chandler, whose upset Congressional victory was fueled by contributions that came in response to ads on a handful of political blogs. Or Iraqi blogger Zayed, whose Healing Irag blog (healingiraq.blogspot.com) scooped Big Media. Or acridrabbit, who inspired an online community to become investigative reporters and discover that the dying Kaycee Nichols sad tale was a hoax. Give the people tools to make the news, We the Media asserts, and they will. Journalism in the 21st century will be fundamentally different from the Big Media that prevails today. We the Media casts light on the future of journalism, and invites us all to be part of it. O'Reilly Media 08-02-2004 11-02-2007 Dan Gillmor 304 -book 9780894711350 0894711350 us Gray's Anatomy: The Unabridged Running Press Edition of the American Classic Gray's Anatomy: The Unabridged Running Press Edition of the American Classic Hardcover Bargain Books/ Reference/ Family Health/ Anatomy/ Surgery/ Bargain Books Outlet $18.98 $0.34 3.5 The leg bone's connected to the hip bone, and so on. For many of us, anatomy can seem intimidating and unrewarding, but the right teacher can clear such feelings away in a heartbeat. Our fascination with our bodies is a powerful force, and once we start looking, we find that beauty is much more than skin-deep./ It so happens that the right teacher can take the form of a book. Gray's Anatomy is one of those few titles that practically everybody has heard of, and with good reason--it is a scientific and artistic triumph. Not just a dry index of parts and names, Gray's lets the natural beauty and grace of the body's interconnected systems and structures shine forth from the page. Using sumptuous illustrations and clear, matter-of-fact descriptions, Dr. Gray unleashed a classic on the world more than 100 years ago. Its clarity and usefulness keep it in print today. Whether you want to understand yourself or others, knowledge of our physical parts and how they fit together is essential. Gray's Anatomy provides that information in a simple, timeless format that cleanly dissects a body of knowledge grown over centuries. This book will not only fill the needs of people in the medical profession, but will please artists and naturalists as well. --Rob Lightner/ Running Press Book Publishers 11-02-2007 Henry F. R. S. Gray/ T. Pickering Pick 1248 -book 9780976694076 0976694077 us Best of Ruby Quiz Volume One (Pragmatic Programmers) Best of Ruby Quiz Volume One (Pragmatic Programmers) Paperback/ Illustrated Object-Oriented Design/ Software Development $29.95 $13.71 4.5 Solve these twenty-five popular programming puzzles, and sharpen your programming skills as you craft solutions./ You'll find interesting and challenging programming puzzles including:/ • 800 Numbers/ • Crosswords/ • Cryptograms/ • Knight's Tour/ • Paper, Rock, Scissors/ • Tic-Tac-Toe/ • Texas Hold-Em/ • ...and more./ / Learning to program can be quite a challenge. Classes and books can get you so far, but at some point you have to sit down and start playing with some code. Only by reading and writing real code, with real problems, can you learn./ / The Ruby Quiz was built to fill exactly this need for Ruby programmers. Challenges, solutions, and discussions combine to make Ruby Quiz a powerful way to learn Ruby tricks. See how algorithms translate to Ruby code, get exposure to Ruby's libraries, and learn how other programmers use Ruby to solve problems quickly and efficiently./ Pragmatic Bookshelf 01-03-2006 08-02-2007 James Edward Gray 298 -book 9780312064945 0312064942 us The Alexander Technique: A Complete Course in How to Hold and Use Your Body for Maximum Energy The Alexander Technique: A Complete Course in How to Hold and Use Your Body for Maximum Energy Paperback Stress Management/ Psychology & Counseling/ Physical Therapy/ Pharmacology/ Exercise/ Alexander Technique/ Injuries & Rehabilitation $15.95 $5.00 3.5 The Alexander Technique is a proven process of mind and body reeducation that reduces stress and muscle tension, and revitalization those who practice it. Used by many actors, athletes, and dancers, the technique can help anyone increase his or her energy and achieve a more dynamic presence./ / Written by a veteran instructor of the Alexander Technique, this authentic and easy-to-follow guide allows everyone to learn the increasingly popular program, with clear instructions for each exercise, and dozens of helpful photographs that show correct and incorrect positions to use for the exercises and throughout the day./ St. Martin's Griffin 15-11-1991 11-02-2007 John Gray 176 -book 9781556432149 1556432143 us Planet Medicine: Modalities Planet Medicine: Modalities Paperback Holistic/ Holistic Medicine/ Pharmacology/ History/ Philosophy of Medicine $25.00 $7.43 Planet Medicine is a major work by an anthropologist who looks at medicine in a broad context. In this edition, additions to this classic text include a section on Reiki, a comparison of types of palpation used in healing, updates on craniosacral therapy, and a means of understanding how different alternative medicines actually work. Illustrated throughout, this is the standard on the history, philosophy, and anthropology of this subject. North Atlantic Books 11-02-1995 11-02-2007 Richard Grossinger/ Spain Rodriguez/ Alex Grey 602 -book 9781590590997 1590590996 us Logging in Java with the JDK 1.4 Logging API and Apache log4j Logging in Java with the JDK 1.4 Logging API and Apache log4j Hardcover Software Development $49.99 $2.91 3 Logging in Java with the JDK 1.4 Logging API and Apache log4j is the first book to discuss the two foremost logging APIs: JDK 1.4.0 logging API and Apache log4j 1.2.6 logging API for application developers. The internals of each API are examined, contrasted, and compared in exhaustive depth. Programmers will find a wealth of information simply not available elsewhere--not even on the Internet./ Each concept explained is accompanied by code example written in Java language. The book also provides guidelines for extending the existing logging frameworks to cater to application-specific needs. This is an essential handbook for logging-related information and techniques needed for developing applications in the Java language./ Apress 15-04-2003 07-02-2007 Samudra Gupta 336 -book 0785342753066 0201753065 us Component Development for the Java Platform Component Development for the Java Platform Paperback Software Design/ Qualifying Textbooks - Winter 2007 $39.99 $4.41 4.5 Addison-Wesley Professional 15-12-2001 07-02-2007 1 Stuart Dabbs Halloway 304 -book 0636920002925 0596002920 us XML in a Nutshell, 2nd Edition XML in a Nutshell, 2nd Edition Paperback HTML - General/ XML/ Nutshell $39.95 $0.46 4 Continuing in the tradition of the Nutshell series, XML in a Nutshell provides a dense tutorial on its subject, as well as a useful day-to-day reference. While the reader isn't expected to have prior expertise in XML, this book is most effective as an add-on to a more introductory tutorial because of its relatively fast pace./ The authors set out to systematically--and rapidly--cover the basics of XML first, namely the history of the markup language and the various languages and technologies that compose the standard. In this first section, they discuss the basics of XML markup, Document Type Definitions (DTDs), namespaces, and Unicode. From there, the authors move into "narrative-centric documents" in a section that appropriately focuses on the application of XML to books, articles, Web pages and other readable content./ This book definitely presupposes in the reader an aptitude for picking up concepts quickly and for rapidly building cumulative knowledge. Code examples are used--only to illustrate the particular point in question--but not in excess. The book gets into "data-centric" XML, exploring the difference between the object-driven Document Object Model (DOM) and the event-driven Simple API for XML (SAX). However, these areas are a little underpowered and offer a bit less detail about this key area than the reader will expect./ At the core of any Nutshell book is the reference section, and the installment found inside this text is no exception. Here, the XML 1.0 standard, XPath, XSLT, DOM, SAX, and character sets are covered. Some material that is covered earlier in the book--such as Cascading Style Sheets (CSS)--is not re-articulated, however. XML in a Nutshell is not the only book on XML you should have, but it is definitely one that no XML coder should be without. --Stephen W. Plain/ Topics covered:/ • XML history/ • Document Type Definitions (DTDs)/ • Namespaces/ • Internationalization/ • XML-based data formats/ • XHTML/ • XSL/ • XPath/ • XLink/ • XPointer/ • Cascading Style Sheets (CSS)/ • XSL-FO/ • Document Object Model (DOM)/ • Simple API for XML (SAX)/ O'Reilly 15-06-2002 07-02-2007 Elliotte Rusty Harold/ W. Scott Means 640 -book 9781932394283 1932394281 us Lucene in Action (In Action series) Lucene in Action (In Action series) Paperback/ Illustrated $44.95 $31.14 4.5 Lucene is a gem in the open-source world‹-a highly scalable, fast search engine. It delivers performance and is disarmingly easy to use. Lucene in Action is the authoritative guide to Lucene. It describes how to index your data, including types you definitely need to know such as MS Word, PDF, HTML, and XML. It introduces you to searching, sorting, filtering, and highlighting search results./ / Lucene powers search in surprising places‹-in discussion groups at Fortune 100 companies, in commercial issue trackers, in email search from Microsoft, in the Nutch web search engine (that scales to billions of pages). It is used by diverse companies including Akamai, Overture, Technorati, HotJobs, Epiphany, FedEx, Mayo Clinic, MIT, New Scientist Magazine, and many others. Adding search to your application can be easy. With many reusable examples and good advice on best practices, Lucene in Action shows you how./ / What's Inside/ - How to integrate Lucene into your applications/ - Ready-to-use framework for rich document handling/ - Case studies including Nutch, TheServerSide, jGuru, etc./ - Lucene ports to Perl, Python, C#//.Net, and C++/ - Sorting, filtering, term vectors, multiple, and remote index searching/ - The new SpanQuery family, extending query parser, hit collecting/ - Performance testing and tuning/ - Lucene add-ons (hit highlighting, synonym lookup, and others)/ Manning Publications 28-12-2004 07-02-2007 Erik Hatcher/ Otis Gospodnetic 456 -book 9781930110588 1930110588 us Java Development with Ant Java Development with Ant Paperback/ Illustrated $44.95 $24.67 4.5 Encompassing Java-centric software project best practices for designing and automating build, test, and deployment processes using ANT, this book is written for developers using Java in large software projects and those who have reached the limits of classic IDE development systems. Benefiting developers who apply extreme programming methodology to Java projects, this resource provides detailed coverage of ANT and explains how to use it in large projects and extend it when needed. In addition to using ANT for Java applications, it includes discussions of servlets and J2EE applications, which cover the majority of Java development projects./ Manning Publications 08-02-2002 07-02-2007 Erik Hatcher/ Steve Loughran 672 -book 9781930110977 1930110979 us Code Generation in Action Code Generation in Action Paperback Software Design/ Software Development/ Systems Analysis & Design/ Coding Theory $44.95 $9.95 4.5 Developers using code generation are producing higher quality code faster than their hand-coding counterparts. And, they enjoy other advantages like maintainability, consistency and abstraction. Using the new CG methods they can make a change in one place, avoiding multiple synchronized changes you must make by hand./ Code Generation in Action shows you the techniques of building and using programs to write other programs. It shows how to avoid repetition and error to produce consistent, high quality code, and how to maintain it more easily. It demonstrates code generators for user interfaces, database access, remote procedure access, and much more./ Code Generation in Action is an A-to-Z guide covering building, buying, deploying and using code generators. If you are a software engineer-whether beginner or advanced-eager to become the "ideas person," the mover-and-shaker on your development team, you should learn CG techniques. This book will help you master them./ What's Inside:/ • Code generation basics/ • CG techniques and best practices/ • Patterns of CG design/ • How to deploy generators/ • Many example generators/ Includes generators for:/ • Database access/ • RPC/ • Unit tests/ • Documentation/ • Business logic/ • Data translation/ Over his twenty years of development experience, Jack Herrington has shipped many software applications helped by code generation techniques. He runs the Code Generation Network. Manning Publications 01-07-2003 07-02-2007 Jack Herrington 368 -book 9780140951448 014095144X us Tao of Pooh and Te of Piglet Boxed Set Tao of Pooh and Te of Piglet Boxed Set Paperback/ Box set History of Books/ 20th Century/ Taoism/ Entertainment/ Literature & Fiction/ Nonfiction/ Religion & Spirituality $27.00 $9.89 4 Is there such thing as a Western Taoist? Benjamin Hoff says there is, and this Taoist's favorite food is honey. Through brilliant and witty dialogue with the beloved Pooh-bear and his companions, the author of this smash bestseller explains with ease and aplomb that rather than being a distant and mysterious concept, Taoism is as near and practical to us as our morning breakfast bowl. Romp through the enchanting world of Winnie-the-Pooh while soaking up invaluable lessons on simplicity and natural living. Penguin (Non-Classics) 01-11-1994 11-02-2007 Benjamin Hoff -book 9780140230161 0140230165 us The Te of Piglet The Te of Piglet Paperback History of Books/ British/ Taoism/ Mysticism/ 20th Century $14.00 $1.05 3 In The Te of Piglet, a good deal of Taoist wisdom is revealed through the character and actions of A. A. Milne's Piglet. Piglet herein demonstrates a very important principle of Taoism: The Te-a Chinese word meaning Virtue-of the Small. Penguin (Non-Classics) 01-11-1993 11-02-2007 Benjamin Hoff 272 -book 9780553345841 0553345842 us The Mind's I: Fantasies and Reflections on Self and Soul The Mind's I: Fantasies and Reflections on Self and Soul Paperback Consciousness & Thought/ Cognitive Psychology/ Cognitive Science $18.95 $1.43 4.5 Brilliant, shattering, mind-jolting, The  Mind's I is a searching, probing nook--a  cosmic journey of the mind--that goes deeply into  the problem of self and self-consciousness as  anything written in our time. From verbalizing  chimpanzees to scientific speculations involving  machines with souls, from the mesmerizing, maze-like  fiction of Borges to the tantalizing, dreamlike  fiction of Lem and Princess Ineffable, her circuits  glowing read and gold, The Mind's I   opens the mind to the Black Box of fantasy, to the  windfalls of reflection, to new dimensions of  exciting possibilities. Bantam 01-04-1985 08-02-2007 Douglas Hofstadter/ Daniel C. Dennett 512 -book 9780880225724 0880225726 us Unix Shell Commands Quick Reference (Que Quick Reference Series) Unix Shell Commands Quick Reference (Que Quick Reference Series) Paperback MacOS/ Shell/ Macintosh/ Macs $8.95 $0.77 4 Que Pub 10-02-1990 07-02-2007 William Holliker 154 -book 9780131855861 0131855867 us Spring Into HTML and CSS (Spring Into... Series) Spring Into HTML and CSS (Spring Into... Series) Paperback HTML - General/ Internet/ XHTML/ Qualifying Textbooks - Winter 2007 $29.99 $6.00 4 Addison-Wesley Professional 22-04-2005 07-02-2007 1 Molly E. Holzschlag 336 -book 9789629962166 9629962160 us Business Chinese Business Chinese Paperback English (All)/ Chinese/ Study & Teaching/ Qualifying Textbooks - Winter 2007 $33.00 $24.98 This book will help readers develop their competence in advanced Chinese in a business context. Rather than teaching language in isolation from substantive content, Business Chinese presents readers with both content and context. Exercises and tasks in the book require readers to integrate their language skills with their content knowledge. To meet learners'practical communication needs, the book focuses on both oral and written language skills./ In order to keep readers abreast of the real business world, all texts and exercises are drawn from authentic materials from mainland China, Taiwan and Hong Kong. Business Chinese is the perfect, practical guide for those who want to master Chinese language and the Chinese business world./ Chinese University Press 20-07-2005 09-02-2007 Jiaying Howard/ Tsengtseng Chang 311 -book 0785342616224 020161622X us The Pragmatic Programmer: From Journeyman to Master The Pragmatic Programmer: From Journeyman to Master Paperback Qualifying Textbooks - Winter 2007 $45.99 $21.84 4.5 Programmers are craftspeople trained to use a certain set of tools (editors, object managers, version trackers) to generate a certain kind of product (programs) that will operate in some environment (operating systems on hardware assemblies). Like any other craft, computer programming has spawned a body of wisdom, most of which isn't taught at universities or in certification classes. Most programmers arrive at the so-called tricks of the trade over time, through independent experimentation. In The Pragmatic Programmer, Andrew Hunt and David Thomas codify many of the truths they've discovered during their respective careers as designers of software and writers of code./ Some of the authors' nuggets of pragmatism are concrete, and the path to their implementation is clear. They advise readers to learn one text editor, for example, and use it for everything. They also recommend the use of version-tracking software for even the smallest projects, and promote the merits of learning regular expression syntax and a text-manipulation language. Other (perhaps more valuable) advice is more light-hearted. In the debugging section, it is noted that, "if you see hoof prints think horses, not zebras." That is, suspect everything, but start looking for problems in the most obvious places. There are recommendations for making estimates of time and expense, and for integrating testing into the development process. You'll want a copy of The Pragmatic Programmer for two reasons: it displays your own accumulated wisdom more cleanly than you ever bothered to state it, and it introduces you to methods of work that you may not yet have considered. Working programmers will enjoy this book. --David Wall/ Topics covered: A useful approach to software design and construction that allows for efficient, profitable development of high-quality products. Elements of the approach include specification development, customer relations, team management, design practices, development tools, and testing procedures. This approach is presented with the help of anecdotes and technical problems./ Addison-Wesley Professional 20-10-1999 07-02-2007 1 Andrew Hunt/ David Thomas 352 -book 9780974514017 0974514012 us Pragmatic Unit Testing in Java with JUnit Pragmatic Unit Testing in Java with JUnit Paperback/ Illustrated Software Development/ Testing/ Information Systems/ Information Theory $29.95 $16.46 4.5 Learn how to improve your Java coding skills using unit testing. Despite it's name, unit testing is really a coding technique, not a testing technique. Unit testing is done by programmers, for programmers. It's primarily for our benefit: we get improved confidence in our code, better ability to make deadlines, less time spent in the debugger, and less time beating on the code to make it work correctly. This book shows how to write tests, but more importantly, it goes where other books fear to tread and gives you concrete advice and examples of what to test--the common things that go wrong in all of our programs. Discover the tricky hiding places where bugs breed, and how to catch them using the freely available JUnit framework. It's easy to learn how to think of all the things in your code that are likely to break. We'll show you how with helpful mnemonics, summarized in a handy tip sheet (also available from our www.pragmaticprogrammer.com website) to help you remember all this stuff. With this book you will:/ • Write better code, and take less time to write it/ • Discover the tricky places where bugs breed/ • Learn how to think of all the things that could go wrong/ • Test individual pieces of code without having to include the whole project/ • Test effectively with the whole team/ We'll also cover how to use Mock Objects for testing, how to write high quality test code, and how to use unit testing to improve your design skills. We'll show you frequent "gotchas"--along with the fixes--to save you time when problems come up. We'll show you how with helpful mnemonics, summarized in a handy tip sheet (also available from our www.pragmaticprogrammer.com website). But the best part is that you don't need a sweeping mandate to change your whole team or your whole company. You don't need to adopt Extreme Programming or Test-Driven Development, or change your development process in order to reap the proven benefits of unit testing. You can start unit testing, the pragmatic way, right away. The Pragmatic Programmers 09-02-2003 07-02-2007 Andrew Hunt/ David Thomas 159 -book 9780596000400 0596000405 us Java Servlet Programming, 2nd Edition Java Servlet Programming, 2nd Edition Paperback/ Illustrated Web Site Design/ Java/ Web Programming/ Servlets $44.95 $7.90 4 Aimed at Web developers with some previous Java experience, Java Servlet Programming, Second Edition, offers a solid introduction to the world of Java development with Servlets and related technologies. Thoroughly revised and newly updated with over a half-dozen new chapters, this title brings an already useful text up to speed with some leading-edge material. It excels particularly in explaining how to program dynamic Web content using Java Servlets, with a fine introduction to all the APIs, programming techniques, and tips you will need to be successful with this standard./ Besides a useful guide to APIs, the book looks at a variety of techniques for saving session state, as well as showing how Servlets can work together to power Web sites. You will learn performance tips and ways to get Servlets to work together (like forwarding and redirection), plus the basics of database programming with JDBC, to build content with "live" data. A later chapter examines what's next for Servlets with the emerging Servlet 2.3 API standard. Importantly, the authors go over deploying and configuring Web applications by editing XML files, a must-have for successfully running Servlets in real applications./ Since the first edition of this title, the choices for Java Web developers have grown much richer. Many of the new chapters in this edition look at options beyond Servlets. Short sections on application frameworks such as Tea, WebMacro, the Element Construction Set (ECS), XMLC, and JavaServer Pages (JSP) let you explore what's out there for Java developers today with a survey of some current tools that can speed up creating new Web applications./ The text closes with reference sections on Servlet APIs (and other material) that will be useful for any working developer. Although Servlets are not the only game in town, they are still important tools for successful Web development. This updated edition shows you just how to do it with plenty of basic and advanced tips for taking full advantage of this powerful Java standard. --Richard Dragan/ Topics covered:/ • Overview and history of Java Servlets/ • Fundamentals of HTTP/ • Web applications (including deployment and configuration using XML files)/ • The Servlet lifecycle (initializing, processing requests, cleanup, and caching)/ • Multimedia content (images and compressed content)/ • WAP and WML for wireless content/ • Servlet session tracking techniques (hidden form fields, cookies, and URL rewriting)/ • Security issues with Servlets (including certificates and SSL)/ • Tutorial for JDBC and Java database programming/ • Using applets and Servlets together/ • Servlet collaboration/ • Quick introduction to Java 2 Enterprise Edition (J2EE)/ • Internationalization issues/ • Survey of third-party Servlet application frameworks and tools: Tea, WebMacro, the Element Contruction Set (ECS), XMLC, and JavaServer Pages (JSP)/ • Miscellaneous tips for Servlets (including sending e-mail and using regular expressions)/ • Description of the new Servlet 2.3 API spec/ • Servlet API quick reference/ O'Reilly Media 15-01-2001 07-02-2007 Jason Hunter 753 -book 9781930110991 1930110995 us JUnit in Action JUnit in Action Paperback/ Illustrated Object-Oriented Design/ Software Design/ Testing/ Systems Analysis & Design $39.95 $21.83 4.5 A guide to unit testing Java applications (including J2EE applications) using the JUnit framework and its extensions, this book provides techniques for solving real-world problems such as unit testing legacy applications, writing real tests for real objects, automating tests, testing in isolation, and unit testing J2EE and database applications. Using a sample-driven approach, various unit testing strategies are covered, such as how to unit test EJBs, database applications, JSPs, and Taglibs. Also addressed are testing strategies using freely available open source frameworks and tools, and how to unit test in isolation with Mock Objects. Testing J2EE applications by running tests from inside the container for performing integration unit tests is discussed, as is how to automate unit testing in automated builds (such as Ant and Maven) for performing continuous integration./ Manning Publications 28-10-2003 07-02-2007 Ted Husted/ Vincent Massol 384 -book 9781932394498 1932394494 us RSS and Atom in Action: Web 2.0 Building Blocks RSS and Atom in Action: Web 2.0 Building Blocks Paperback/ Illustrated Web Site Design/ Internet $39.95 $19.98 4.5 RSS and Atom in Action is organized into two parts. The first part introduces the blog technologies of news feed formats and publishing protocols-the building blocks. The second part shows how to put to those blocks together to assemble interesting and useful blog applications. In keeping with the behind Manning's "In Action" series, this book shows the reader, through numerous examples in Java and C#, how to parse Atom and RSS format newsfeeds, how to generate valid newsfeeds and serve them efficiently, and how to automate blogging via web services based on the new Atom protocol and the older MetaWeblog API. The book also shows how to develop a complete blog client library that readers can use in their own applications. The second half of the book is devoted to a dozen blog apps-small but immediately useful example applications such as a community aggregator, a file distribution newsfeed, a blog cross-poster, an email-to-blog gateway, Ant tasks for blogging softwarebuilds, and more./ Manning Publications 31-07-2006 07-02-2007 Dave Johnson 300 -book 9780684868769 0684868768 us Emergence: The Connected Lives of Ants, Brains, Cities, and Software Emergence: The Connected Lives of Ants, Brains, Cities, and Software Paperback Urban/ Chaos & Systems/ History of Science/ Acoustics & Sound/ System Theory/ General & Reference/ Systems Analysis & Design/ Information Theory $15.00 $4.40 3.5 An individual ant, like an individual neuron, is just about as dumb as can be. Connect enough of them together properly, though, and you get spontaneous intelligence. Web pundit Steven Johnson explains what we know about this phenomenon with a rare lucidity in Emergence: The Connected Lives of Ants, Brains, Cities, and Software. Starting with the weird behavior of the semi-colonial organisms we call slime molds, Johnson details the development of increasingly complex and familiar behavior among simple components: cells, insects, and software developers all find their place in greater schemes./ / Most game players, alas, live on something close to day-trader time, at least when they're in the middle of a game--thinking more about their next move than their next meal, and usually blissfully oblivious to the ten- or twenty-year trajectory of software development. No one wants to play with a toy that's going to be fun after a few decades of tinkering--the toys have to be engaging now, or kids will find other toys./ Johnson has a knack for explaining complicated and counterintuitive ideas cleverly without stealing the scene. Though we're far from fully understanding how complex behavior manifests from simple units and rules, our awareness that such emergence is possible is guiding research across disciplines. Readers unfamiliar with the sciences of complexity will find Emergence an excellent starting point, while those who were chaotic before it was cool will appreciate its updates and wider scope. --Rob Lightner/ Scribner 27-08-2002 11-02-2007 Steven Johnson 288 -book 9780743241663 0743241665 us Mind Wide Open: Your Brain and the Neuroscience of Everyday Life Mind Wide Open: Your Brain and the Neuroscience of Everyday Life Paperback Consciousness & Thought/ Applied Psychology/ Neuropsychology/ Personality/ Physiology/ Neuroscience $15.00 $4.96 4 Given the opportunity to watch the inner workings of his own brain, Steven Johnson jumps at the chance. He reveals the results in Mind Wide Open, an engaging and personal account of his foray into edgy brain science. In the 21st century, Johnson observes, we have become used to ideas such as "adrenaline rushes" and "serotonin levels," without really recognizing that complex neurobiology has become a commonplace thing to talk about. He sees recent laboratory revelations about the brain as crucial for understanding ourselves and our psyches in new, post-Freudian ways. Readers shy about slapping electrodes on their own temples can get a vicarious scientific thrill as Johnson tries out empathy tests, neurofeedback, and fMRI scans. The results paint a distinct picture of the author, and uncover general brain secrets at the same time. Memory, fear, love, alertness--all the multitude of states housed in our brains are shown to be the results of chemical and electrical interactions constantly fed and changed by input from our senses. Mind Wide Open both satisfies curiosity and provokes more questions, leaving readers wondering about their own gray matter. --Therese Littleton Scribner 03-05-2005 11-02-2007 Steven Johnson 288 -book 9780139376818 013937681X us The UNIX Programming Environment The UNIX Programming Environment Paperback History/ Unix/ History of Ideas $49.99 $14.04 4.5 Prentice Hall 03-02-1984 07-02-2007 Brian W. Kernighan/ Rob Pike 357 -book 9780312421434 0312421435 us No Logo: No Space, No Choice, No Jobs No Logo: No Space, No Choice, No Jobs Paperback Company Histories/ Labor Policy/ International/ Labor & Industrial Relations/ Production & Operations/ Advertising/ Global/ Ethics/ Anthropology/ Consumer Guides/ Media Studies/ Culture $15.00 $6.38 4 We live in an era where image is nearly everything, where the proliferation of brand-name culture has created, to take one hyperbolic example from Naomi Klein's No Logo, "walking, talking, life-sized Tommy [Hilfiger] dolls, mummified in fully branded Tommy worlds." Brand identities are even flourishing online, she notes--and for some retailers, perhaps best of all online: "Liberated from the real-world burdens of stores and product manufacturing, these brands are free to soar, less as the disseminators of goods or services than as collective hallucinations."/ In No Logo, Klein patiently demonstrates, step by step, how brands have become ubiquitous, not just in media and on the street but increasingly in the schools as well. (The controversy over advertiser-sponsored Channel One may be old hat, but many readers will be surprised to learn about ads in school lavatories and exclusive concessions in school cafeterias.) The global companies claim to support diversity, but their version of "corporate multiculturalism" is merely intended to create more buying options for consumers. When Klein talks about how easy it is for retailers like Wal-Mart and Blockbuster to "censor" the contents of videotapes and albums, she also considers the role corporate conglomeration plays in the process. How much would one expect Paramount Pictures, for example, to protest against Blockbuster's policies, given that they're both divisions of Viacom?/ Klein also looks at the workers who keep these companies running, most of whom never share in any of the great rewards. The president of Borders, when asked whether the bookstore chain could pay its clerks a "living wage," wrote that "while the concept is romantically appealing, it ignores the practicalities and realities of our business environment." Those clerks should probably just be grateful they're not stuck in an Asian sweatshop, making pennies an hour to produce Nike sneakers or other must-have fashion items. Klein also discusses at some length the tactic of hiring "permatemps" who can do most of the work and receive few, if any, benefits like health care, paid vacations, or stock options. While many workers are glad to be part of the "Free Agent Nation," observers note that, particularly in the high-tech industry, such policies make it increasingly difficult to organize workers and advocate for change./ But resistance is growing, and the backlash against the brands has set in. Street-level education programs have taught kids in the inner cities, for example, not only about Nike's abusive labor practices but about the astronomical markup in their prices. Boycotts have commenced: as one urban teen put it, "Nike, we made you. We can break you." But there's more to the revolution, as Klein optimistically recounts: "Ethical shareholders, culture jammers, street reclaimers, McUnion organizers, human-rights hacktivists, school-logo fighters and Internet corporate watchdogs are at the early stages of demanding a citizen-centered alternative to the international rule of the brands ... as global, and as capable of coordinated action, as the multinational corporations it seeks to subvert."No Logo is a comprehensive account of what the global economy has wrought and the actions taking place to thwart it. --Ron Hogan/ Picador 06-04-2002 11-02-2007 Naomi Klein 528 -book 0029236723101 0789723107 us Don't Make Me Think: A Common Sense Approach to Web Usability Don't Make Me Think: A Common Sense Approach to Web Usability Paperback Web Site Design/ Internet Publishing/ Interface Design $35.00 $14.98 5 Usability design is one of the most important--yet often least attractive--tasks for a Web developer. In Don't Make Me Think, author Steve Krug lightens up the subject with good humor and excellent, to-the-point examples./ The title of the book is its chief personal design premise. All of the tips, techniques, and examples presented revolve around users being able to surf merrily through a well-designed site with minimal cognitive strain. Readers will quickly come to agree with many of the book's assumptions, such as "We don't read pages--we scan them" and "We don't figure out how things work--we muddle through." Coming to grips with such hard facts sets the stage for Web design that then produces topnotch sites./ Using an attractive mix of full-color screen shots, cute cartoons and diagrams, and informative sidebars, the book keeps your attention and drives home some crucial points. Much of the content is devoted to proper use of conventions and content layout, and the "before and after" examples are superb. Topics such as the wise use of rollovers and usability testing are covered using a consistently practical approach./ This is the type of book you can blow through in a couple of evenings. But despite its conciseness, it will give you an expert's ability to judge Web design. You'll never form a first impression of a site in the same way again. --Stephen W. Plain/ Topics covered:/ • User patterns/ • Designing for scanning/ • Wise use of copy/ • Navigation design/ • Home page layout/ • Usability testing/ New Riders Press 13-10-2000 07-02-2007 Steve Krug 195 -book 9781585730407 1585730408 us Pocket Menu Reader China (Pocket Dictionaries) (Langenscheidt's Pocket Menu Reader) Pocket Menu Reader China (Pocket Dictionaries) (Langenscheidt's Pocket Menu Reader) Paperback Chinese/ Dictionaries; Polyglot/ Phrasebooks - General/ Dining $7.95 $4.98 1 Each Pocket Menu Reader is an indispensable gastronomic dictionary, phrasebook, and guidebook. It includes more than 1,500 words with translations and pronunciations, comprehensive treatment of the country's cuisine, an alphabetical list of dishes and culinary terms, plus a gourmet's selection of recipes. Langenscheidt Publishers 15-11-2000 11-02-2007 Langenscheidt 189 -book 9780897500487 0897500482 us Tao of Jeet Kune Do Tao of Jeet Kune Do Paperback Contemporary/ New Age $16.95 $7.00 4.5 To watch Bruce Lee on film is an amazing experience. Those who have read Tao of Jeet Kune Do, however, know that Lee's prose can also be exhilarating. This praiseworthy and enduring bestseller (mainly written over six months when Lee was bedridden with back problems) compiles philisophical aphorisms, explanations on technique, and sketches by the master himself. Ohara Publications 07-02-1993 06-02-2007 Bruce Lee 208 -book 9780974175706 0974175706 us Getting Around in Chinese Getting Around in Chinese Paperback Study & Teaching $25.00 $13.25 3 Marco Liang & Co. 02-02-2003 11-02-2007 Marco Liang 669 -book 9780201570090 0201570092 us China: Empire of Living Symbols China: Empire of Living Symbols Hardcover Photo Essays/ Chinese/ Linguistics $39.90 $1,203.99 4.5 Perseus Books 11-02-1991 11-02-2007 Cecilia Lindqvist 423 -book 9781556432767 1556432763 us Ba Gua: Hidden Knowledge in the Taoist Internal Martial Art Ba Gua: Hidden Knowledge in the Taoist Internal Martial Art Paperback Reference $16.95 $6.49 4 North Atlantic Books 12-02-1998 11-02-2007 Hsing-Han Liu/ John Bracy 138 -book 9780738204314 0738204315 us The Cluetrain Manifesto: The End of Business as Usual The Cluetrain Manifesto: The End of Business as Usual Paperback Strategy & Competition/ Theory/ Customer Service/ Systems & Planning/ Consumerism/ Web Marketing/ Social Theory/ Peripherals $14.00 $1.79 4 How would you classify a book that begins with the salutation, "People of Earth..."? While the captains of industry might dismiss it as mere science fiction, The Cluetrain Manifesto is definitely of this day and age. Aiming squarely at the solar plexus of corporate America, authors Christopher Locke, Rick Levine, Doc Searls, and David Weinberger show how the Internet is turning business upside down. They proclaim that, thanks to conversations taking place on Web sites and message boards, and in e-mail and chat rooms, employees and customers alike have found voices that undermine the traditional command-and-control hierarchy that organizes most corporate marketing groups. "Markets are conversations," the authors write, and those conversations are "getting smarter faster than most companies." In their view, the lowly customer service rep wields far more power and influence in today's marketplace than the well-oiled front office PR machine./ The Cluetrain Manifesto began as a Web site (www.cluetrain.com) in 1999 when the authors, who have worked variously at IBM, Sun Microsystems, the Linux Journal, and NPR, posted 95 theses that pronounced what they felt was the new reality of the networked marketplace. For example, thesis no. 2: "Markets consist of human beings, not demographic sectors"; thesis no. 20: "Companies need to realize their markets are often laughing. At them"; thesis no. 62: "Markets do not want to talk to flacks and hucksters. They want to participate in the conversations going on behind the corporate firewall"; thesis no. 74: "We are immune to advertising. Just forget it." The book enlarges on these themes through seven essays filled with dozens of stories and observations about how business gets done in America and how the Internet will change it all. While Cluetrain will strike many as loud and over the top, the message itself remains quite relevant and unique. This book is for anyone interested in the Internet and e-commerce, and is especially important for those businesses struggling to navigate the topography of the wired marketplace. All aboard! --Harry C. Edwards/ Perseus Books Group 09-01-2001 11-02-2007 Christopher Locke/ Rick Levine/ Doc Searls/ David Weinberger 190 -book 9780806906164 0806906162 us Chinese System Of Natural Cures Chinese System Of Natural Cures Paperback Herbal Remedies/ Basic Science/ History/ Chinese Medicine $11.95 $1.98 3 Discover traditional Chinese herbal healing formulas--and how to use the Four Energies, the Five Flavors, and the Four Movements to prescribe various herbal treatments, as well as acupuncture and other methods of pain relief. Detailed sections of specific treatments of patients' complaints, and recommended herbal treatments for diagnosed diseases, including high cholesterol, diabetes, heart and coronary problems, arthritis, allergies, and more./ Sterling 31-12-1994 11-02-2007 Henry C. Lu 160 -book 9780156799805 0156799804 us The Secret of the Golden Flower: A Chinese Book of Life The Secret of the Golden Flower: A Chinese Book of Life Paperback Taoism/ New Age/ Behavioral Psychology $12.00 $1.24 4 1955. The point of view established in this volume is that the spirit must lean on science as its guide in the world of reality, and that science must turn to the spirit for the meaning of life. This book lends us a new approach to the East, and it also strengthens the point of view evolving in the West with respect to the psyche. Wilhelm provides the reader with the text and explanation, while another section contains commentary by Jung. Harvest//HBJ Book 08-02-1962 08-02-2007 Tung-Pin Lu 149 -book 9780394717272 0394717279 us Acupuncture: The Ancient Chinese Art of Healing and How it Works Scientifically Acupuncture: The Ancient Chinese Art of Healing and How it Works Scientifically Paperback Acupuncture & Acupressure/ Healing/ China/ Pharmacology $11.00 $0.01 Dr. Felix Mann, President of the Medical Acupuncture Society, is one of the outstanding Western practitioners of the ancient Chinese art, which he has been using for some years in London. In this complete revision of his 1962 book -- over half of which is entirely new material -- he describes in detail for the first time how acupuncture works from a scientific point of view, explaining the neurophysiological mechanism involved as well as the basic principles and laws according to the theories of traditional Chinese medicine. Written for both the layman and the medical profession, the book illustrates its points with case histories drawn from Dr. Mann's own patients in England. Vintage 12-01-1973 06-02-2007 Felix Mann 256 -book 9780135974445 0135974445 us Agile Software Development, Principles, Patterns, and Practices Agile Software Development, Principles, Patterns, and Practices Hardcover C & C++ Windows Programming/ Object-Oriented Design/ Software Development/ Qualifying Textbooks - Winter 2007 $68.20 $38.50 4.5 Prentice Hall 15-10-2002 03-02-2007 Robert C. Martin 552 -book 9780974514062 0974514063 us Pragmatic Version Control Using Subversion Pragmatic Version Control Using Subversion Paperback/ Illustrated Software Design/ Software Development/ Software Engineering $29.95 $23.94 4.5 This book covers the theory behind version control and how it can help developers become more efficient, work better as a team, and keep on top of software complexity. All projects need version control: it's the lifeblood of any project's infrastructure, yet half of all project teams in the U.S. don't use any version control at all. Many others don't use it well and end up experiencing time-consuming problems. Version control, done well, is your "undo" button for the project: nothing is final, and mistakes are easily rolled back. This book describes Subversion, the latest and hottest open source version control system, using a recipe-based approach that will get you up and running quickly--and correctly. Learn how to use Subversion the right way--the pragmatic way. With this book, you can:/ • Keep all project assets safe--not just source code--and never run the risk of losing a great idea/ • Know how to undo bad decisions--even directories and symlinks are versioned/ • Learn how to share code safely, and work in parallel for maximum efficiency/ • Install Subversion and organize, administer and backup your repository/ • Share code over a network with Apache, svnserve, or ssh/ • Create and manage releases, code branches, merges and bug fixes/ • Manage 3rd party code safely/ Now there's no excuse not to use professional-grade version control. Pragmatic Bookshelf 08-02-2005 07-02-2007 Mike Mason 207 -book 9780804836340 0804836345 us Chinese Character Fast Finder Chinese Character Fast Finder Paperback English (All)/ Chinese $19.95 $13.48 5 Chinese Character Fast Finder allows users to find Chinese characters based on their appearance alone, without knowing their pronunciation, radical or stroke count. This reference book has been designed for serious learners of Chinese as well as readers with an interest in written Chinese./ Convenient features include printed thumb-index marks for rapid access to any character; all the characters prescribed for the Chinese government's official HSK (Hanyu Shuiping Koshi) Language Proficiency Test, and simplified characters and their pinyin pronunciation. Tuttle Publishing 15-03-2005 07-02-2007 Laurence Matthews 256 -book 9780596009281 0596009283 us Firefox Hacks: Tips & Tools for Next-Generation Web Browsing (Hacks) Firefox Hacks: Tips & Tools for Next-Generation Web Browsing (Hacks) Paperback/ Illustrated Privacy/ Network Security/ Software Development/ Web Browsers/ Web Programming/ Internet Security/ Qualifying Textbooks - Winter 2007 $24.95 $12.74 4.5 Firefox Hacks is ideal for power users who want to take full advantage of Firefox from Mozilla, the next-generation web browser that is rapidly subverting Internet Explorer's once-dominant audience. It's also the first book that specifically dedicates itself to this technology. Firefox is winning such widespread approval for a number of reasons, including the fact that it lets users browse faster and more efficiently. Perhaps its most appealing strength, though, is its increased security something that is covered in great detail in Firefox Hacks. Clearly the web browser of the future, Firefox includes most of the features that browser users are familiar with, along with several new features, such as a bookmarks toolbar and tabbed pages that allow users to quickly switch among several web sites. Firefox Hacks offers all the valuable tips and tools you need to maximize the effectiveness of this hot web application. It's all covered, including how to customize its deployment, appearance, features, and functionality. You'll even learn how to install, use, and alter extensions and plug-ins. Aimed at clever people who may or may not be capable of basic programming tasks, this convenient resource describes 100 techniques for 100 strategies that effectively exploit Firefox. Or, put another way, readers of every stripe will find all the user-friendly tips, tools, and tricks they need to make a productive switch to Firefox. With Firefox Hacks, a superior and safer browsing experience is truly only pages away. The latest in O'Reilly's celebrated Hacks series, Firefox Hacks smartly complements other web-application titles such as Google Hacks and PayPal Hacks. O'Reilly Media 14-03-2005 07-02-2007 Nigel McFarlane 377 -book 9780321356703 0321356705 us Software Security: Building Security In (Addison-Wesley Software Security Series) Software Security: Building Security In (Addison-Wesley Software Security Series) Paperback Privacy/ Network Security/ Software Development $49.99 $6.40 5 Addison-Wesley Professional 23-01-2006 07-02-2007 1 Gary McGraw 448 -book 9780124848306 0124848303 us Programming for the Newton Using Windows Programming for the Newton Using Windows Paperback Object-Oriented Design/ Software Development/ Windows - General/ PCs $34.95 $9.46 5 Morgan Kaufmann Pub 09-02-1996 07-02-2007 Julie McKeehan/ Neil Rhodes 440 -book 0676251832068 0804832064 us Reading and Writing Chinese: A Guide to the Chinese Writing System Reading and Writing Chinese: A Guide to the Chinese Writing System Paperback Chinese/ Phrasebooks - General/ Southeast Asian/ Reading Skills/ Study & Teaching/ Writing Skills $24.95 $12.50 4 Reading and Writing Chinese has been the standard text for foreign students and teachers of the Chinese Writing System since Tuttle first published it over 20 years ago. This new, completely revised edition offers students a more convenient, efficient, and up-to-date introduction to the writing system./ Charles E. Tuttle Co. 09-02-1999 11-02-2007 William McNaughton/ Li Ying 368 -book 9780877736769 0877736766 us WAY OF CHAUNG TZU (Shambhala Pocket Classics) WAY OF CHAUNG TZU (Shambhala Pocket Classics) Paperback Taoism/ Eastern Philosophy/ Comparative Religion/ Paperback $6.00 $5.80 5 Working from existing translations, Thomas Merton composed a series of personal versions from his favorites among the classic sayings of Chuang Tzu, the most spiritual of the Chinese philosophers. Chuang Tzu, who wrote in the fourth and third centuries B.C., is the chief authentic historical spokesman for Taoism and its founder Lao Tzu (a legendary character known largely through Chuang Tzu's writings). Indeed it was because of Chuang Tzu and the other Taoist sages that Indian Buddhism was transformed, in China, into the unique vehicle we now call by its Japanese name — Zen. The Chinese sage abounds in wit, paradox, satire, and shattering insight into the true ground of being. Father Merton, no stranger to Asian thought, brings a vivid, modern idiom to the timeless wisdom of Tao. Illustrated with early Chinese drawings. Shambhala 30-06-1992 11-02-2007 Thomas Merton 240 -book 0636920926221 1565926226 us Cascading Style Sheets: The Definitive Guide Cascading Style Sheets: The Definitive Guide Paperback Web Site Design/ Internet Publishing/ Database Design/ Structured Design/ HTML - General/ Web Programming/ Web Authoring & Design $34.95 $1.70 4 Cascading Style Sheets can put a great deal of control and flexibility into the hands of a Web designer--in theory. In reality, however, varying browser support for CSS1 and lack of CSS2 implementation makes CSS a very tricky topic. Cascading Style Sheets: The Definitive Guide is a comprehensive text that shows how to take advantage of the benefits of CSS while keeping compatibility issues in mind./ The book is very upfront about the spotty early browser support for CSS1 and the sluggish adoption of CSS2. However, enthusiasm for the technology spills out of the pages, making a strong case for even the most skeptical reader to give CSS a whirl and count on its future. The text covers CSS1 in impressive depth--not only the syntactical conventions but also more general concepts such as specificity and inheritance. Frequent warnings and tips alert the reader to browser-compatibility pitfalls./ Entire chapters are devoted to topics like units and values, visual formatting and positioning, and the usual text, fonts, and colors. This attention to both detail and architecture helps readers build a well-rounded knowledge of CSS and equips readers for a future of real-world debugging. Cascading Style Sheets honestly explains the reasons for avoiding an in-depth discussion of the still immature CSS2, but covers the general changes over CSS1 in a brief chapter near the end of the book./ When successfully implemented, Cascading Style Sheets result in much more elegant HTML that separates form from function. This fine guide delivers on its promise as an indispensable tool for CSS coders. --Stephen W. Plain/ Topics covered:/ • HTML with CSS/ • Selectors and structure/ • Units/ • Text manipulation/ • Colors and backgrounds/ • Boxes and borders/ • Visual formatting principles/ • Positioning/ • CSS2 preview/ • CSS case studies/ O'Reilly 15-05-2000 07-02-2007 Eric Meyer 470 -book 0636920001201 0596001207 us CSS Pocket Reference CSS Pocket Reference Paperback Web Graphics/ Web Site Design/ Internet Publishing/ HTML - General/ Pocket/ Web Programming/ Web Authoring & Design $9.95 $2.11 4 CSS (Cascading Style Sheets) is the W3C-approved method for enriching the visual presentation of web pages. CSS allows web pages to become more structural, and at the same time promises that they can have a more sophisticated look than ever before. With good implementations in Internet Explorer 5.0 and Opera 3.6, and 100% CSS1 support expected in Netscapes's Mozilla browser, signs are that CSS is rapidly becoming a useful, reliable, and powerful tool for web authors./ The CSS Pocket Reference briefly introduces CSS and then lists all CSS1 properties, plus the CSS1 pseudo-elements and pseudo-classes. Since browser incompatibility is the biggest obstacle to CSS adoption, we've also included a comprehensive guide to how the browsers have implemented support for CSS1. For anyone who wants to correctly implement CSS, this is a handy condensed reference to all the details in the larger volume, Cascading Style Sheets: The Definitive Guide./ O'Reilly 16-05-2001 07-02-2007 Eric A. Meyer 96 -book 0752064712459 073571245X us Eric Meyer on CSS: Mastering the Language of Web Design Eric Meyer on CSS: Mastering the Language of Web Design Paperback Web Site Design/ Internet Publishing/ HTML - General/ jp-unknown1/ Qualifying Textbooks - Winter 2007 $45.00 $9.99 4.5 There are several other books on the market that serve as in-depth technical guides or reference books for CSS. None, however, take a more hands-on approach and use practical examples to teach readers how to solve the problems they face in designing with CSS - until now. Eric Meyer provides a variety of carefully crafted projects that teach how to use CSS and why particular methods were chosen. The web site includes all of the files needed to complete the tutorials in the book. In addition, bonus information is be posted. New Riders Press 28-06-2002 03-02-2007 Eric A. Meyer 352 -book 9780865681743 0865681740 us Xing Yi Nei Gong: Xing Yi Health Maintenance and Internal Strength Development Xing Yi Nei Gong: Xing Yi Health Maintenance and Internal Strength Development Paperback $21.95 $14.05 4.5 This is the most complete book on the art of xing yi (hsing Yi) available. It includes the complete xing yi history and lineage going back eight generations; manuscripts handed down from famous practitioners Dai Long Bang and Li Neng Ran; 16 health maintenance and power development exercises; qigong (chi kung) exerices; xing yi long spear power training exercises; and more. Unique Publications 10-02-1998 03-02-2007 Dan Miller/ Tim Cartmell 200 -book 9781883175009 1883175003 us Liang Zhen Pu Eight Diagram Palm Liang Zhen Pu Eight Diagram Palm Paperback Taichi/ jp-unknown3 $17.95 $14.99 5 High View Pubns 04-02-1993 11-02-2007 Li Zi Ming 168 -book 9780609810347 0609810340 us Bhagavad Gita: A New Translation Bhagavad Gita: A New Translation Paperback Eastern/ Bhagavad Gita $13.95 $7.45 3.5 On the list of the greatest spiritual books of all time, the Bhagavad Gita resides permanently in the top echelon. This poem of patently Indian genius sprouted an immense tree of devotional, artistic, and philosophical elaboration in the subcontinent. The scene is a battlefield with the prince Arjuna pitted against his own family, but no sooner does the poem begin than the action reverts inward. Krishna, Arjuna's avatar and spiritual guide, points the way to the supreme wisdom and perfect freedom that lie within everyone's reach. Worship and be faithful, meditate and know reality--these make up the secret of life and lead eventually to the realization that the self is the root of the world. In this titular translation, Stephen Mitchell's rhythms are faultless, making music of this ancient "Song of the Blessed One." Savor his rendition, but nibble around the edges of his introduction. In a bizarre mixture of praise and condescension, Mitchell disregards two millennia of Indian commentary, seeking illumination on the text from Daoism and Zen, with the Gita coming up just shy of full spiritual merit. Perhaps we should take it from Gandhi, who used the Gita as a handbook for life, that it nourishes on many levels. --Brian Bruya Three Rivers Press 27-08-2002 11-02-2007 Stephen Mitchell 224 -book 9780060923211 0060923210 us The Gospel According to Jesus The Gospel According to Jesus Paperback Classics/ New Testament/ Study/ Inspirational/ Christology $14.00 $2.62 3.5 A dazzling presentation of the life and teachings of Jesus by the eminent scholar and translator Stephen Mitchell. Harper Perennial 31-03-1993 08-02-2007 Stephen Mitchell 320 -book 9780690012903 069001290X us Flower, Moon, Snow: A Book of Haiku Flower, Moon, Snow: A Book of Haiku Library Binding 20th Century/ Japanese & Haiku/ United States $12.89 $4.40 Crowell 04-02-1977 03-02-2007 Kazue Mizumura 48 -book 9780060922245 0060922249 us Care of the Soul : A Guide for Cultivating Depth and Sacredness in Everyday Life Care of the Soul : A Guide for Cultivating Depth and Sacredness in Everyday Life Paperback Personal Transformation/ New Age/ Spiritual/ Pastoral Theology $14.00 $0.01 4 Care of the Soul is considered to be one of the best primers for soul work ever written. Thomas Moore, an internationally renowned theologian and former Catholic monk, offers a philosophy for living that involves accepting our humanity rather than struggling to transcend it. By nurturing the soul in everyday life, Moore shows how to cultivate dignity, peace, and depth of character. For example, in addressing the importance of daily rituals he writes, "Ritual maintains the world's holiness. As in a dream a small object may assume significance, so in a life that is animated by ritual there are no insignificant things." This is the eloquence that helped reintroduce the sacred into everyday language and contemporary values. Harper Paperbacks 26-01-1994 03-02-2007 Thomas Moore 336 -book 9780416543506 0416543502 us Pooh's Workout Book Pooh's Workout Book Hardcover Parodies/ British $13.44 5 Methuen young books 24-10-1985 11-02-2007 Ethan Mordden 176 -book 9780596007652 0596007655 us Ambient Findability Ambient Findability Paperback/ Illustrated Web Site Design/ Database Design/ Internet/ Web Programming/ Web Authoring & Design/ Qualifying Textbooks - Winter 2007 $29.95 $14.94 4 How do you find your way in an age of information overload? How can you filter streams of complex information to pull out only what you want? Why does it matter how information is structured when Google seems to magically bring up the right answer to your questions? What does it mean to be "findable" in this day and age? This eye-opening new book examines the convergence of information and connectivity. Written by Peter Morville, author of the groundbreaking Information Architecture for the World Wide Web, the book defines our current age as a state of unlimited findability. In other words, anyone can find anything at any time. Complete navigability./ Morville discusses the Internet, GIS, and other network technologies that are coming together to make unlimited findability possible. He explores how the melding of these innovations impacts society, since Web access is now a standard requirement for successful people and businesses. But before he does that, Morville looks back at the history of wayfinding and human evolution, suggesting that our fear of being lost has driven us to create maps, charts, and now, the mobile Internet./ The book's central thesis is that information literacy, information architecture, and usability are all critical components of this new world order. Hand in hand with that is the contention that only by planning and designing the best possible software, devices, and Internet, will we be able to maintain this connectivity in the future. Morville's book is highlighted with full color illustrations and rich examples that bring his prose to life./ Ambient Findability doesn't preach or pretend to know all the answers. Instead, it presents research, stories, and examples in support of its novel ideas. Are we truly at a critical point in our evolution where the quality of our digital networks will dictate how we behave as a species? Is findability indeed the primary key to a successful global marketplace in the 21st century and beyond. Peter Morville takes you on a thought-provoking tour of these memes and more -- ideas that will not only fascinate but will stir your creativity in practical ways that you can apply to your work immediately./ "A lively, enjoyable and informative tour of a topic that's only going to become more important."
--David Weinberger, Author, Small Pieces Loosely Joined and The Cluetrain Manifesto/ "I envy the young scholar who finds this inventive book, by whatever strange means are necessary. The future isn't just unwritten--it's unsearched."
--Bruce Sterling, Writer, Futurist, and Co-Founder, The Electronic Frontier Foundation/ "Search engine marketing is the hottest thing in Internet business, and deservedly so. Ambient Findability puts SEM into a broader context and provides deeper insights into human behavior. This book will help you grow your online business in a world where being found is not at all certain."
--Jakob Nielsen, Ph.D., Author, Designing Web Usability: The Practice of Simplicity/ "Information that's hard to find will remain information that's hardly found--from one of the fathers of the discipline of information architecture, and one of its most experienced practitioners, come penetrating observations on why findability is elusive and how the act of seeking changes us."
--Steve Papa, Founder and Chairman, Endeca/ "Whether it's a fact or a figure, a person or a place, Peter Morville knows how to make it findable. Morville explores the possibilities of a world where everything can always be found--and the challenges in getting there--in this wide-ranging, thought-provoking book."
--Jesse James Garrett, Author, The Elements of User Experience/ "It is easy to assume that current searching of the World Wide Web is the last word in finding and using information. Peter Morville shows us that search engines are just the beginning. Skillfully weaving together information science research with his own extensive experience, he develops for the reader a feeling for the near future when information is truly findable all around us. There are immense implications, and Morville's lively and humorous writing brings them home."
--Marcia J. Bates, Ph.D., University of California Los Angeles/ "I've always known that Peter Morville was smart. After reading Ambient Findability, I now know he's (as we say in Boston) wicked smart. This is a timely book that will have lasting effects on how we create our future.
--Jared Spool, Founding Principal, User Interface Engineering/ "In Ambient Findability, Peter Morville has put his mind and keyboard on the pulse of the electronic noosphere. With tangible examples and lively writing, he lays out the challenges and wonders of finding our way in cyberspace, and explains the mutually dependent evolution of our changing world and selves. This is a must read for everyone and a practical guide for designers."
--Gary Marchionini, Ph.D., University of North Carolina/ "Find this book! Anyone interested in making information easier to find, or understanding how finding and being found is changing, will find this thoroughly researched, engagingly written, literate, insightful and very, very cool book well worth their time. Myriad examples from rich and varied domains and a valuable idea on nearly every page. Fun to read, too!
--Joseph Janes, Ph.D., Founder, Internet Public Library/ O'Reilly Media 01-10-2005 07-02-2007 Peter Morville 188 -book 0636920924180 1565924185 us Java Threads, Second Edition Java Threads, Second Edition Paperback Parallel Computing/ Java $34.95 $0.47 3.5 Building sophisticated Java applets means learning about threading--if you need to read data from a network, for example, you can't afford to let a delay in its delivery lock up your entire applet. Java Threads introduces the Java threading API and uses non-computing analogies--such as scenarios involving bank tellers--to explain the need for synchronization and the dangers of deadlock. Scott Oaks and Henry Wong follow up their high-level examples with more detailed discussions on building a thread scheduler in Java, dealing with advanced synchronization issues, and handling exceptions. O'Reilly 20-01-1999 07-02-2007 Scott Oaks/ Henry Wong 336 -book 9780312275631 0312275633 us Awareness: The Key to Living in Balance Awareness: The Key to Living in Balance Paperback Meditation/ Mysticism/ Self-Help/ Spiritualism/ Osho/ Personal Transformation $11.95 $7.61 4.5 Underlying all meditation techniques, including martial arts-and in fact underlying all great athletic performances-is a quality of being awake and present to the moment, a quality that Osho calls awareness. Once we can identify and understand what this quality of awareness is, we have the key to self-mastery in virtually every area of our lives.According to great masters like Lao Tzu or Buddha, most of us move through our lives like sleepwalkers. Never really present in what we are doing, never fully alert to our environment, and not even aware of what motivates us to do and say the things we do.At the same time, all of us have experienced moments of awareness-or awakening, to use another-in extraordinary circumstances. On the road, in a sudden and unexpected accident, time seems to stop and one is suddenly aware of every movement, every sound, every thought. Or in moments that touch us deeply-welcoming a new baby into the world for the first time, or being with someone at the moment of death.Awareness, says Osho, is the key to being self-directed, centered, and free in every aspect of our lives. In this book, Osho teaches how to live life more attentively, mindfully, and meditatively, with love, caring and consciousness.OSHO challenges readers to examine and break free of the conditioned belief systems and prejudices that limit their capacity to life in all its richness. He has been described by the Sunday Times of London as one of the "1000 Makers of the 20th Century" and by Sunday Mid-Day (India) as one of the ten people-along with Gandhi, Nehru, and Buddha-who have changed the destiny of India. More than a decade after his death in 1990, the influence of his teachings continues to expand, reaching seekers of all ages in virtually every country of the world. St. Martin's Griffin 10-12-2001 03-02-2007 Osho 192 -book 9781580632256 1580632254 us Tao: The Pathless Path Tao: The Pathless Path Paperback Taoism/ Osho $12.95 $7.61 5 In his commentaries on five parables from the Leih Tzu, Osho brings a fresh and contemporary interpretation to the ancient wisdom of Tao. Leih Tzu was a well-known Taoist master in the fourth century B.C., and his sly critiques of a Confucius provide abundant opportunities for the reader to explore the contrasts between the rational and irrational, the male and female, the structured and the spontaneous."Who Is Really Happy" uses the discovery of a human skull on the roadside to probe into the question of immortality and how misery arises out of the existence of the ego."A Man Who Knows How to Console Himself" looks beneath the apparent cheerfulness of a wandering monk and asks if there is really a happiness that endures through life's ups and downs."No Regrets" is a parable about the difference between the knowledge that is gathered from the outside and the "knowing" that arises from within."No Rest for the Living" uses a dialogue between a despondent seeker and his master to reveal the limits of philosophy and the crippling consequences of living for the sake of some future goal. "Best Be Still, Best Be Empty" discusses the difference between the path of the will, the via affirmitiva of Christianity, Judaism, and Islam, versus the path of the mystic, the via negativa of Buddha and Lao Tzu.A Q&A section addresses how Taoist understanding applies to everyday life in concrete, practical terms. Renaissance Books 23-02-2002 11-02-2007 Osho 192 -book 9780865681729 0865681724 us Fundamentals of Pa Kua Chan, Vol. 1 Fundamentals of Pa Kua Chan, Vol. 1 Paperback $19.95 $9.00 4.5 Unique Publications 01-02-1999 11-02-2007 Bok Nam Park/ Dan Miller 204 -book B000AMLXHM us THE EMPEROR'S NEW MIND: CONCERNING COMPUTERS, MINDS, AND THE LAWS OF PHYSICS..."Ranks among the most innovative and exciting science books to be published in the last forty years." THE EMPEROR'S NEW MIND: CONCERNING COMPUTERS, MINDS, AND THE LAWS OF PHYSICS..."Ranks among the most innovative and exciting science books to be published in the last forty years." Paperback $1.89 PENGUIN BOOKS 08-02-1990 08-02-2007 ROGER PENROSE -book 9780936085241 093608524X us The Fine Art of Technical Writing The Fine Art of Technical Writing Paperback Technical $9.95 $0.95 4.5 This slender volume for the beginning technical writer doesn't delve very deeply into its subject, but The Fine Art of Technical Writing does make some nice points. Most appealing and useful is the book's premise: though its subject matter can be dry, "technical writing is a creative act." Author Carol Rosenblum Perry likens the technical writer to a ceramist, recommending that he or she get as much down on paper (or computer) as possible for the first draft, then think of that "rough text as a big, shapeless lump of clay" to be sculpted. Next is a more oblique analogy to figurative drawing. Perry urges the technical writer to consider the writing's "skeleton" (order), "body mass" (conciseness), and "muscle tone" (vigor). Finally, technical writing is compared to making music: "Writing, like music, depends on its dynamics ... varying degrees of 'loudness' and 'softness.'" Blue Heron Publishing 11-02-1991 11-02-2007 Carol Rosenblum Perry 111 -book 9780596004484 0596004486 us Version Control with Subversion Version Control with Subversion Paperback/ Illustrated Software Development $34.95 $18.69 4.5 One of the greatest frustrations in most software projects is version control: the art of managing changes to information. Today's increasingly fast pace of software development--as programmers make small changes to software one day only to undo them the next--has only heightened the problem; consecutive work on code or single-programmer software is a rare sight these days. Without careful attention to version control, concurrent and collaborative work can create more headaches than it solves. This is where Subversion comes into play. Written by members of the Subversion open source development team, Version Control with Subversion introduces the powerful new versioning tool designed to be the successor to the Concurrent Version System or CVS. CVS users will find the "look and feel" Subversion comfortably familiar, but under the surface it's far more flexible, robust, and usable, and more importantly, it improves on CVS's more notable flaws. The book begins with a general introduction to Subversion, the basic concepts behind version control, and a guided tour of Subversion's capabilities and structure. With thorough attention to detail, the authors cover every aspect of installing and configuring Subversion for managing a programming project, documentation, or any other team-based endeavor. Later chapters cover the more complex topics of branching, repository administration, and other advanced features such as properties, externals, and access control. The book ends with reference material and appendices covering a number of useful topics such as a Subversion complete reference and troubleshooting guide. Version Control with Subversion aims to be useful to readers of widely different backgrounds, from those with no previous experience in version control to experienced sysadmins. If you've never used version control, you'll find everything you need to get started in this book. And if you're a seasoned CVS pro, this book will help you make a painless leap into Subversion. O'Reilly Media 22-06-2004 07-02-2007 C. Michael Pilato/ Ben Collins-Sussman/ Brian W. Fitzpatrick 304 -book 9780060958404 0060958405 us Words and Rules: The Ingredients of Language Words and Rules: The Ingredients of Language Paperback Logic & Language/ Grammar/ Linguistics/ Neuroscience/ Cognitive $15.00 $3.50 4 Human languages are capable of expressing a literally endless number of different ideas. How do we manage it--so effortlessly that we scarcely ever stop to think about it? In Words and Rules: The Ingredients of Language, a look at the simple concepts that we use to devise works as complex as love sonnets and tax laws, renowned neuroscientist and linguist Steven Pinker shows us how. The latest linguistic research suggests that each of us stores a limited (though large) number of words and word-parts in memory and manipulates them with a much smaller number of rules to produce every writing and utterance, and Pinker explains every step of the way with engaging good humor./ Pinker's enthusiasm for the subject infects the reader, particularly as he emphasizes the relation between how we communicate and how we think. What does it mean that a small child who has never heard the word wug can tell a researcher that when one wug meets another, there are two wugs? Some rule must be telling the child that English plurals end in -s, which also explains mistakes like mouses. Is our communication linked inextricably with our thinking? Pinker says yes, and it's hard to disagree. Words and Rules is an excellent introduction to and overview of current thinking about language, and will greatly reward the careful reader with new ways of thinking about how we think, talk, and write. --Rob Lightner/ Harper Perennial 15-01-2000 03-02-2007 Steven Pinker 368 -book 9781556434303 1556434308 us Healing With Whole Foods: Asian Traditions and Modern Nutrition Healing With Whole Foods: Asian Traditions and Modern Nutrition (3rd Edition) 3rd Paperback Healthy/ Macrobiotics/ Healing/ Naturopathy/ Family Health/ Diet Therapy/ Whole Foods/ Chinese Medicine/ Healthy Living $35.00 $22.00 5 Used as a reference by students of acupuncture, this is a hefty, truly comprehensive guide to the theory and healing power of Chinese medicine. It's also a primer on nutrition--including facts about green foods, such as spirulina and blue-green algae, and the "regeneration diets" used by cancer patients and arthritics--along with an inspiring cookbook with more than 300 mostly vegetarian, nutrient-packed recipes./ The information on Chinese medicine is useful for helping to diagnose health imbalances, especially nascent illnesses. It's smartly paired with the whole-foods program because the Chinese have attributed various health-balancing properties to foods, so you can tailor your diet to help alleviate symptoms of illness. For example, Chinese medicine dictates that someone with low energy and a pale complexion (a yin deficiency) would benefit from avoiding bitter foods and increasing "sweet" foods such as soy, black sesame seeds, parsnips, rice, and oats. (Note that the Chinese definition of sweet foods is much different from the American one!)/ Pitchford says in his dedication that he hopes the reader finds "healing, awareness, and peace" from following his program. The diet is certainly acetic by American standards (no alcohol, caffeine, white flour, fried foods, or sugar, and a minimum of eggs and dairy) but the reasons he gives for avoiding these "negative energy" foods are compelling. From the adrenal damage imparted by coffee to immune dysfunction brought on by excess refined sugar, Pitchford spurs you to rethink every dietary choice and its ultimate influence on your health. Without being alarmist, he adds dietary tips for protecting yourself against the dangers of modern life, including neutralizing damage from water fluoridation (thyroid and immune-system problems may result; fluoride is a carcinogen). There's further reading on food combining, female health, heart disease, pregnancy, fasting, and weight loss. Overall, this is a wonderful book for anyone who's serious about strengthening his or her body from the inside out. --Erica Jorgensen/ North Atlantic Books 12-02-2002 11-02-2007 Paul Pitchford 750 -book 9780596002633 0596002637 us Practical RDF Practical RDF Paperback/ Illustrated XML/ Web Programming/ Qualifying Textbooks - Winter 2007 $39.95 $25.99 3.5 The Resource Description Framework (RDF) is a structure for describing and interchanging metadata on the Web--anything from library catalogs and worldwide directories to bioinformatics, Mozilla internal data structures, and knowledge bases for artificial intelligence projects. RDF provides a consistent framework and syntax for describing and querying data, making it possible to share website descriptions more easily. RDF's capabilities, however, have long been shrouded by its reputation for complexity and a difficult family of specifications. Practical RDF breaks through this reputation with immediate and solvable problems to help you understand, master, and implement RDF solutions. Practical RDF explains RDF from the ground up, providing real-world examples and descriptions of how the technology is being used in applications like Mozilla, FOAF, and Chandler, as well as infrastructure you can use to build your own applications. This book cuts to the heart of the W3C's often obscure specifications, giving you tools to apply RDF successfully in your own projects. The first part of the book focuses on the RDF specifications. After an introduction to RDF, the book covers the RDF specification documents themselves, including RDF Semantics and Concepts and Abstract Model specifications, RDF constructs, and the RDF Schema. The second section focuses on programming language support, and the tools and utilities that allow developers to review, edit, parse, store, and manipulate RDF//XML. Subsequent sections focus on RDF's data roots, programming and framework support, and practical implementation and use of RDF and RDF//XML. If you want to know how to apply RDF to information processing, Practical RDF is for you. Whether your interests lie in large-scale information aggregation and analysis or in smaller-scale projects like weblog syndication, this book will provide you with a solid foundation for working with RDF. O'Reilly Media 07-02-2003 07-02-2007 Shelley Powers 331 -book 9781556433023 1556433026 us Cheng Hsin: The Principles of Effortless Power Cheng Hsin: The Principles of Effortless Power Paperback New Age $16.95 $11.44 4.5 The basic text of one of the geniuses of martial arts in America. North Atlantic Books 01-02-1999 06-02-2007 Peter Ralston 184 -book 9781583941591 1583941592 us Zen Body-Being: An Enlightened Approach to Physical Skill, Grace, and Power Zen Body-Being: An Enlightened Approach to Physical Skill, Grace, and Power Paperback Meditation/ Personal Transformation/ Zen/ Zen Philosophy/ Physical Education $16.95 $10.51 5 In this inspiring guide, Peter Ralston presents a program of "physical education" for anyone interested in body improvement. Using simple, clear language to demystify the Zen mindset, he draws on more than three decades of experience teaching students and apprentices worldwide who have applied his body-being approach. More of a transformative guide than a specific list of exercises devoted to any particular physical approach, Zen Body-Being explains how to create a state of mental control, enhanced feeling-awareness, correct structural alignment, increased spatial acuity, and even a greater interactive presence. Exercises are simple, often involving feeling-imagery or a kind of meditative awareness that has a profound and sometimes instant effect. Where similar guides teach readers what to do, this challenging book by the man whose insights Dan Millman has said “speak to us all,” teaches readers how to be./ North Atlantic Books, Frog Ltd. 27-07-2006 18-02-2007 Peter Ralston/ Laura Ralston 200 -book 0639785334866 0071377646 us Schaum's Outline of Chinese Grammar Schaum's Outline of Chinese Grammar Paperback Chinese/ Grammar/ Vocabulary/ Study Guides/ Reference/ Schaum's Outlines $17.95 $10.15 4.5 Schaum's Outline of Chinese Grammar is designed to assist beginning and intermediate students of Mandarin Chinese develop and enhance their knowledge of Chinese grammar. Chinese morphology can be intimidating to students. By simplifying the learning process, this practical book enriches the student's understanding of the Chinese language./ The accessible summary of the major features of Chinese grammar complete with clear explanations of terms and usage is especially helpful to students. The book features 200 sets of practice exercises as well as Chinese-English and English-Chinese glossaries. It serves as a much-needed supplement to textbooks and class materials currently being used in first-and second-year college-level courses./ McGraw-Hill 13-02-2004 11-02-2007 Claudia Ross 304 -book 9780439784542 0439784549 us Harry Potter and the Half-Blood Prince Harry Potter and the Half-Blood Prince (Book 6) 6 Hardcover/ Unabridged Humorous/ Science Fiction, Fantasy, & Magic/ Contemporary/ Hardcover/ School $29.99 $3.45 4.5 The long-awaited, eagerly anticipated, arguably over-hyped Harry Potter and the Half-Blood Prince has arrived, and the question on the minds of kids, adults, fans, and skeptics alike is, "Is it worth the hype?" The answer, luckily, is simple: yep. A magnificent spectacle more than worth the price of admission, Harry Potter and the Half-Blood Prince will blow you away. However, given that so much has gone into protecting the secrets of the book (including armored trucks and injunctions), don't expect any spoilers in this review. It's much more fun not knowing what's coming--and in the case of Rowling's delicious sixth book, you don't want to know. Just sit tight, despite the earth-shattering revelations that will have your head in your hands as you hope the words will rearrange themselves into a different story. But take one warning to heart: do not open Harry Potter and the Half-Blood Prince until you have first found a secluded spot, safe from curious eyes, where you can tuck in for a good long read. Because once you start, you won't stop until you reach the very last page./ A darker book than any in the series thus far with a level of sophistication belying its genre, Harry Potter and the Half-Blood Prince moves the series into murkier waters and marks the arrival of Rowling onto the adult literary scene. While she has long been praised for her cleverness and wit, the strength of Book 6 lies in her subtle development of key characters, as well as her carefully nuanced depiction of a community at war. In Harry Potter and the Half-Blood Prince, no one and nothing is safe, including preconceived notions of good and evil and of right and wrong. With each book in her increasingly remarkable series, fans have nervously watched J.K. Rowling raise the stakes; gone are the simple delights of butterbeer and enchanted candy, and days when the worst ailment could be cured by a bite of chocolate. A series that began as a colorful lark full of magic and discovery has become a dark and deadly war zone. But this should not come as a shock to loyal readers. Rowling readied fans with Harry Potter and the Goblet of Fire and Harry Potter and the Order of the Phoenix by killing off popular characters and engaging the young students in battle. Still, there is an unexpected bleakness from the start of Book 6 that casts a mean shadow over Quidditch games, silly flirtations, and mountains of homework. Ready or not, the tremendous ending of Harry Potter and the Half-Blood Prince will leave stunned fans wondering what great and terrible events await in Book 7 if this sinister darkness is meant to light the way. --Daphne Durham/ Visit the Harry Potter Store/ / Our Harry Potter Store features all things Harry, including books (box sets and collector's editions), audio CDs and cassettes, DVDs, soundtracks, games, and more. 

Begin at the Beginning/ Harry Potter and the Sorcerer's Stone/ / Hardcover / Paperback/ Harry Potter and the Chamber of Secrets/ / Hardcover/ Paperback/ Harry Potter and the Prisoner of Azkaban/ / Hardcover/ Paperback/ Harry Potter and the Goblet of Fire/ / Hardcover/ Paperback/ Harry Potter and the Order of the Phoenix/ / Hardcover/ Paperback/ 
Why We Love Harry
Favorite Moments from the Series
There are plenty of reasons to love Rowling's wildly popular series--no doubt you have several dozen of your own. Our list features favorite moments, characters, and artifacts from the first five books. Keep in mind that this list is by no means exhaustive (what we love about Harry could fill ten books!) and does not include any of the spectacular revelatory moments that would spoil the books for those (few) who have not read them. Enjoy./ Harry Potter and the Sorcerer's Stone/ / * Harry's first trip to the zoo with the Dursleys, when a boa constrictor winks at him. / * When the Dursleys' house is suddenly besieged by letters for Harry from Hogwarts. Readers learn how much the Dursleys have been keeping from Harry. Rowling does a wonderful job in displaying the lengths to which Uncle Vernon will go to deny that magic exists. / * Harry's first visit to Diagon Alley with Hagrid. Full of curiosities and rich with magic and marvel, Harry's first trip includes a trip to Gringotts and Ollivanders, where Harry gets his wand (holly and phoenix feather) and discovers yet another connection to He-Who-Must-No-Be-Named. This moment is the reader's first full introduction to Rowling's world of witchcraft and wizards./ * Harry's experience with the Sorting Hat./ Harry Potter and the Chamber of Secrets/ / * The de-gnoming of the Weasleys' garden. Harry discovers that even wizards have chores--gnomes must be grabbed (ignoring angry protests "Gerroff me! Gerroff me!"), swung about (to make them too dizzy to come back), and tossed out of the garden--this delightful scene highlights Rowling's clever and witty genius. / * Harry's first experience with a Howler, sent to Ron by his mother. / * The Dueling Club battle between Harry and Malfoy. Gilderoy Lockhart starts the Dueling Club to help students practice spells on each other, but he is not prepared for the intensity of the animosity between Harry and Draco. Since they are still young, their minibattle is innocent enough, including tickling and dancing charms./ Harry Potter and the Prisoner of Azkaban/ / * Ron's attempt to use a telephone to call Harry at the Dursleys'. / * Harry's first encounter with a Dementor on the train (and just about any other encounter with Dementors). Harry's brush with the Dementors is terrifying and prepares Potter fans for a darker, scarier book. / * Harry, Ron, and Hermione's behavior in Professor Trelawney's Divination class. Some of the best moments in Rowling's books occur when she reminds us that the wizards-in-training at Hogwarts are, after all, just children. Clearly, even at a school of witchcraft and wizardry, classes can be boring and seem pointless to children. / * The Boggart lesson in Professor Lupin's classroom. / * Harry, Ron, and Hermione's knock-down confrontation with Snape./ Harry Potter and the Goblet of Fire/ / * Hermione's disgust at the reception for the veela (Bulgarian National Team Mascots) at the Quidditch World Cup. Rowling's fourth book addresses issues about growing up--the dynamic between the boys and girls at Hogwarts starts to change. Nowhere is this more plain than the hilarious scene in which magical cheerleaders nearly convince Harry and Ron to jump from the stands to impress them. / * Viktor Krum's crush on Hermione--and Ron's objection to it. / * Malfoy's "Potter Stinks" badge. / * Hermione's creation of S.P.E.W., the intolerant bigotry of the Death Eaters, and the danger of the Triwizard Tournament. Add in the changing dynamics between girls and boys at Hogwarts, and suddenly Rowling's fourth book has a weight and seriousness not as present in early books in the series. Candy and tickle spells are left behind as the students tackle darker, more serious issues and take on larger responsibilities, including the knowledge of illegal curses./ Harry Potter and the Order of the Phoenix/ / / * Harry's outburst to his friends at No. 12 Grimmauld Place. A combination of frustration over being kept in the dark and fear that he will be expelled fuels much of Harry's anger, and it all comes out at once, directly aimed at Ron and Hermione. Rowling perfectly portrays Harry's frustration at being too old to shirk responsibility, but too young to be accepted as part of the fight that he knows is coming. / * Harry's detention with Professor Umbridge. Rowling shows her darker side, leading readers to believe that Hogwarts is no longer a safe haven for young wizards. Dolores represents a bureaucratic tyrant capable of real evil, and Harry is forced to endure their private battle of wills alone. / * Harry and Cho's painfully awkward interactions. Rowling clearly remembers what it was like to be a teenager. / * Harry's Occlumency lessons with Snape. / * Dumbledore's confession to Harry./ / Magic, Mystery, and Mayhem: A Conversation with J.K. Rowling
/ / "I am an extraordinarily lucky person, doing what I love best in the world. I'm sure that I will always be a writer. It was wonderful enough just to be published. The greatest reward is the enthusiasm of the readers."--J.K. Rowling/ Find out more about Harry's creator in our exclusive interview with J.K. Rowling./ 

Did You Know?/ / The Little White Horse was J.K. Rowling's favorite book as a child./ / Jane Austen is Rowling's favorite author./ / Roddy Doyle is Rowling's favorite living writer./ A Few Words from Mary GrandPré
/ / "When I illustrate a cover or a book, I draw upon what the author tells me; that's how I see my responsibility as an illustrator. J.K. Rowling is very descriptive in her writing--she gives an illustrator a lot to work with. Each story is packed full of rich visual descriptions of the atmosphere, the mood, the setting, and all the different creatures and people. She makes it easy for me. The images just develop as I sketch and retrace until it feels right and matches her vision." Check out more Harry Potter art from illustrator Mary GrandPré. 
/ Scholastic, Inc. 16-07-2005 18-02-2007 J. K. Rowling 672 -book 9780767900027 0767900022 us The Illuminated Rumi The Illuminated Rumi Hardcover Middle Eastern/ Inspirational & Religious/ Eastern European/ Mysticism/ Folk Art/ Ancient, Classical & Medieval $30.00 $13.99 4.5 Rise up nimbly and go on your strange journey to the ocean of meanings.../ / In the mid-thirteenth century, in a dusty marketplace in Konya, Turkey, a city where Muslim, Christian, Hindu, and Buddhist travelers mingled, Jelaluddin Rumi, a popular philosopher and scholar, met Shams of Tabriz, a wandering dervish.  Their meeting forever altered the course of Rumi's life and influenced the mystical evolution of the planet.  The bond they formed was everlasting--a powerful transcendent friendship that would flow through Rumi as some of the world's best-loved ecstatic poetry./ / Rumi's passionate, playful poems find and celebrate sacred life in everyday existence.  They speak across all traditions, to all peoples, and today his relevance and popularity continue to grow.  In The Illuminated Rumi, Coleman Barks, widely regarded as the world's premier translator of Rumi's writings, presents some of his most brilliant work, including many new translations.  To complement Rumi's universal vision, Michael Green has worked the ancient art of illumination into a new, visually stunning form that joins typography, original art, old masters, photographs, and prints with sacred images from around the world./ / The Illuminated Rumi is a truly groundbreaking collaboration that interweaves word and image: a magnificent meeting of ancient tradition and modern interpretation that uniquely captures the spiritual wealth of Rumi's teachings.  Coleman Barks's wise and witty commentary, together with Michael Green's art, makes this a classic guide to the life of the soul for a whole new generation of seekers. Broadway 13-10-1997 11-02-2007 Jalal Al-Din Rumi 128 -book 9780140195682 0140195688 us Sitting: A Guide to Buddhist Meditation Sitting: A Guide to Buddhist Meditation Paperback Rituals & Practice $12.00 $2.90 4.5 Penguin (Non-Classics) 01-05-1998 03-02-2007 Diana St. Ruth 96 -book 9780060970796 0060970790 us The Man Who Mistook His Wife for a Hat: And Other Clinical Tales The Man Who Mistook His Wife for a Hat: And Other Clinical Tales Paperback Doctors & Medicine/ Self-Help & Psychology/ Clinical Psychology/ Neuropsychology/ Mental Illness $13.00 $0.95 4.5 In his most extraordinary book, "one of the great clinical writers of the 20th century"(The New York Times) recounts the case histories of patients lost in the bizarre, apparently inescapable world of neurological disorders. Oliver Sacks's The Man Who Mistook His Wife for a Hat tells the stories of individuals afflicted with fantastic perceptual and intellectual aberrations: patients who have lost their memories and with them the greater part of their pasts; who are no longer able to recognize people and common objects; who are stricken with violent tics and grimaces or who shout involuntary obscenities; whose limbs have become alien; who have been dismissed as retarded yet are gifted with uncanny artistic or mathematical talents./ If inconceivably strange, these brilliant tales remain, in Dr. Sacks's splendid and sympathetic telling, deeply human. They are studies of life struggling against incredible adversity, and they enable us to enter the world of the neurologically impaired, to imagine with our hearts what it must be to live and feel as they do. A great healer, Sacks never loses sight of medicine's ultimate responsibility: "the suffering, afflicted, fighting human subject."/ / Harpercollins 01-02-1987 11-02-2007 Oliver W. Sacks 256 -book 9781590591253 1590591259 us Enterprise Java Development on a Budget: Leveraging Java Open Source Technologies Enterprise Java Development on a Budget: Leveraging Java Open Source Technologies Paperback Software Development/ Logic $49.99 $7.43 4.5 Open source has had a profound effect on the Java community. Many Java open source projects have even become de-facto standards. The principal purpose of Enterprise Java Development on a Budget is to guide you through the development of a real enterprise Java application using nothing but open source Java tools, projects, and frameworks./ This book is organized by activities and by particular open source projects that can help you take on the challenges of building the different tiers of your applications. The authors also present a realistic example application that covers most areas of enterprise application development. You'll find information on how to use and configure JBoss, Ant, XDoclet, Struts, ArgoUML, OJB, Hibernate, JUnit, SWT//JFace, and others. Not only will you learn how to use each individual tool, but you'll also understand how to use them in synergy to create robust enterprise Java applications within your budget./ Enterprise Java Development on a Budget combines coverage of best practices with information on the right open source Java tools and technologies, all of which will help support your Java development budget and goals./ Apress 10-11-2003 08-02-2007 5 1 Brian Sam-Bodden/ Christopher M. Judd 656 -book 9780976694069 0976694069 us Enterprise Integration with Ruby Enterprise Integration with Ruby Paperback/ Illustrated Assembly Language Programming/ Object-Oriented Design $32.95 $15.50 4.5 Typical enterprises use dozens, hundreds, and sometimes even thousands of applications, components, services, and databases. Many of them are custom built in-house or by third parties, some are bought, others are based on open source projects, and the origin of a few--usually the most critical ones--is completely unknown./ A lot of applications are very old, some are fairly new, and seemingly no two of them were written using the same tools. They run on heterogeneous operating systems and hardware, use databases and messaging systems from various vendors, and were written in different programming languages./ See how to glue these disparate applications together using popular technologies such as:/ • LDAP, Oracle, and MySQL/ • XML Documents and DTDs/ • Sockets, HTTP, and REST/ • XML//RPC, SOAP, and others/ • ...and more./ If you're on the hook to integrate enterprise-class systems together, the tips and techniques in this book will help./ Pragmatic Bookshelf 01-04-2006 08-02-2007 Maik Schmidt 330 -book 0639785413059 0071419837 us Teach Yourself Beginner's Chinese Script Teach Yourself Beginner's Chinese Script Paperback Chinese/ Phrasebooks - General/ Alphabet/ Reading Skills/ Writing Skills $10.95 $4.50 3 Now learning non-Roman-alphabet languages is as easy as A-B-C!/ Readers wanting to learn the basics of reading and writing a new language that employs script will find all they need in the Teach Yourself Beginner's Script series. Each book includes a step-by-step introduction to reading and writing in a new language as well as tips and practice exercises to build learners' skills. Thanks to the experts at Teach Yourself, script will no longer be all "Greek" to language learners--unless of course, it is Greek script! Teach Yourself Beginner's Script series books feature:/ • Origins of the language/ • A systematic approach to mastering the script/ • Lots of "hands-on" exercises and activities/ • Practical examples from real-life situations/ McGraw-Hill 06-06-2003 11-02-2007 Elizabeth Scurfield/ Song Lianyi 192 -book 9781590302835 1590302834 us Zen Training: Methods and Philosophy (Shambhala Classics) Zen Training: Methods and Philosophy (Shambhala Classics) Paperback Zen/ Meditation/ Buddha $16.95 $9.95 5 Zen Training is a comprehensive handbook for zazen, seated meditation practice, and an authoritative presentation of the Zen path. The book marked a turning point in Zen literature in its critical reevaluation of the enlightenment experience, which the author believes has often been emphasized at the expense of other important aspects of Zen training. In addition, Zen Training goes beyond the first flashes of enlightenment to explore how one lives as well as trains in Zen. The author also draws many significant parallels between Zen and Western philosophy and psychology, comparing traditional Zen concepts with the theories of being and cognition of such thinkers as Heidegger and Husserl. Shambhala 13-09-2005 06-02-2007 Katsuki Sekida 264 -book 9781583941454 1583941452 us Combat Techniques of Taiji, Xingyi, and Bagua: Principles and Practices of Internal Martial Arts Combat Techniques of Taiji, Xingyi, and Bagua: Principles and Practices of Internal Martial Arts Paperback Taichi $22.95 $13.20 3.5 The combat techniques of Tai Ji, Ba Gua, and Xing Yi were forbidden during China's Cultural Revolution, but the teachings of grandmaster Wang Pei Shing have survived. This comprehensive guide, written by one of his students, selects core movements from each practice and gives the student powerful tools to recognize the unique strategies and skills, and to develop a deeper understanding, of each style. It contains complete instructions for a 16-posture form to gain mastery of combat techniques. The book helps practitioners achieve a new level of practice, where deeply ingrained skills are brought forth in a more fluid, intuitive, and fast-paced fashion. Blue Snake Books//Frog, Ltd. 09-02-2006 11-02-2007 Lu Shengli 400 -book 0752063324547 0672324547 us HTTP Developer's Handbook HTTP Developer's Handbook Paperback HTML - General $39.99 $24.97 4.5 HTTP is the protocol that powers the Web. As Web applications become more sophisticated, and as emerging technologies continue to rely heavily on HTTP, understanding this protocol is becoming more and more essential for professional Web developers. By learning HTTP protocol, Web developers gain a deeper understanding of the Web's architecture and can create even better Web applications that are more reliable, faster, and more secure./ The HTTP Developer's Handbook is written specifically for Web developers. It begins by introducing the protocol and explaining it in a straightforward manner. It then illustrates how to leverage this information to improve applications. Extensive information and examples are given covering a wide variety of issues, such as state and session management, caching, SSL, software architecture, and application security./ Sams 19-03-2003 07-02-2007 Chris Shiflett 312 -book 9780517887943 0517887940 us Feng Shui Step by Step : Arranging Your Home for Health and Happiness--with Personalized Astrological Charts Feng Shui Step by Step : Arranging Your Home for Health and Happiness--with Personalized Astrological Charts Paperback Household Hints/ Psychology & Counseling/ Parapsychology/ Feng Shui $20.00 $0.01 4 Simons, a feng shui master and astrologer, teaches readers how to feng shui their homes in a clear, step-by-step fashion and gives personalized advice based on readers' dates of birth. Simons presents not only the popular eight-point method but also divining techniques and other authentic Chinese methods that make analysis more complete. Illustrations. Three Rivers Press 12-11-1996 07-02-2007 T. Raphael Simons 256 -book 9780380788620 0380788624 us Cryptonomicon Cryptonomicon Paperback United States/ Contemporary/ Literary/ Historical/ Spy Stories & Tales of Intrigue/ Technothrillers/ High Tech/ Paperback/ Action & Adventure $16.00 $1.95 4 Neal Stephenson enjoys cult status among science fiction fans and techie types thanks to Snow Crash, which so completely redefined conventional notions of the high-tech future that it became a self-fulfilling prophecy. But if his cyberpunk classic was big, Cryptonomicon is huge... gargantuan... massive, not just in size (a hefty 918 pages including appendices) but in scope and appeal. It's the hip, readable heir to Gravity's Rainbow and the Illuminatus trilogy. And it's only the first of a proposed series--for more information, read our interview with Stephenson./ Cryptonomicon zooms all over the world, careening conspiratorially back and forth between two time periods--World War II and the present. Our 1940s heroes are the brilliant mathematician Lawrence Waterhouse, cryptanalyst extraordinaire, and gung ho, morphine-addicted marine Bobby Shaftoe. They're part of Detachment 2702, an Allied group trying to break Axis communication codes while simultaneously preventing the enemy from figuring out that their codes have been broken. Their job boils down to layer upon layer of deception. Dr. Alan Turing is also a member of 2702, and he explains the unit's strange workings to Waterhouse. "When we want to sink a convoy, we send out an observation plane first.... Of course, to observe is not its real duty--we already know exactly where the convoy is. Its real duty is to be observed.... Then, when we come round and sink them, the Germans will not find it suspicious."/ All of this secrecy resonates in the present-day story line, in which the grandchildren of the WWII heroes--inimitable programming geek Randy Waterhouse and the lovely and powerful Amy Shaftoe--team up to help create an offshore data haven in Southeast Asia and maybe uncover some gold once destined for Nazi coffers. To top off the paranoiac tone of the book, the mysterious Enoch Root, key member of Detachment 2702 and the Societas Eruditorum, pops up with an unbreakable encryption scheme left over from WWII to befuddle the 1990s protagonists with conspiratorial ties./ Cryptonomicon is vintage Stephenson from start to finish: short on plot, but long on detail so precise it's exhausting. Every page has a math problem, a quotable in-joke, an amazing idea, or a bit of sharp prose. Cryptonomicon is also packed with truly weird characters, funky tech, and crypto--all the crypto you'll ever need, in fact, not to mention all the computer jargon of the moment. A word to the wise: if you read this book in one sitting, you may die of information overload (and starvation). --Therese Littleton/ Harper Perennial 02-05-2000 07-02-2007 Neal Stephenson 928 -book 9780887100260 0887100260 us Fifty-Five T'ang Poems: A Text in the Reading and Understanding of T'Ang Poetry (Far Eastern Publications Series) Fifty-Five T'ang Poems: A Text in the Reading and Understanding of T'Ang Poetry (Far Eastern Publications Series) Paperback Movements & Periods/ Anthologies/ Chinese $26.00 $24.95 3 Four masters of the shi form of Chinese poetry, who are generally considered to be giants in the entire history of Chinese literature, are represented in this book: three from the eighth century, and one from the ninth. A few works by other well-known poets are also included. The author provides a general background sketch, instruction to the student, a description of the phonological system and the spelling used, as well as an outline of the grammar of T'ang shi, insofar as it is known. Yale University Press 15-08-2006 11-02-2007 Hugh M. Stimson 256 -book 9780865681859 0865681856 us Xing Yi Quan Xue: The Study of Form-Mind Boxing Xing Yi Quan Xue: The Study of Form-Mind Boxing Paperback $21.95 $15.22 4.5 This is the first English language edition of Sun Lu Tang's 1915 classic on xing yi (hsing yi). In addition to the original text and photographs by Sun Lu Tang, a complete biography and additional photos of Master Sun have been added. Unique Publications 06-02-2000 06-02-2007 Sun Lu Tang 312 -book 9780596003425 0596003420 us Learning Unix for Mac OS X Learning Unix for Mac OS X Paperback MacOS/ Macintosh/ Macs/ Linux/ X Windows & Motif $19.95 $0.01 3 The success of Apple's operating system, Mac OS X, and its Unix roots has brought many new potential Unix users searching for information. The Terminal application and that empty command line can be daunting at first, but users understand it can bring them power and flexibility. Learning Unix for Mac OS X is a concise introduction to just what a reader needs to know to get a started with Unix on Mac OS X. Many Mac users are familiar and comfortable with the easy-to-use elegance of the GUI. With Mac OS X, they now have the ability to not only continue to use their preferred platform, but to explore the powerful capabilities of Unix. Learning Unix for Mac OS X gives the reader information on how to use the Terminal application, become functional with the command interface, explore many Unix applications, and learn how to take advantage of the strengths of both interfaces./ The reader will find all the common commands simply explained with accompanying examples, exercises, and opportunities for experimentation. The book even includes problem checklists along the way to help the reader if they get stuck. The books begins with a introduction to the Unix environment to encourage the reader to get comfortable with the command line. The coverage then expands to launching and configuring the Terminal application--the heart of the Unix interface for the Mac OS X user. The text also introduces how to manage, create, edit, and transfer files. Most everyone using a computer today knows the importance of the internet. And Learning Unix for Mac OS X provides instruction on how to use function such as mail, chat, and web browsing from the command line. A unique challenge for Mac OS X users is printing from the command line. The book contains an entire chapter on how to configure and utilize the various print functions./ The book has been reviewed by Apple for technological accuracy and brandishes the Apple Development Connection (ADC) logo./ O'Reilly 05-02-2002 07-02-2007 Dave Taylor/ Jerry Peek 156 -book 9780974514055 0974514055 us Programming Ruby: The Pragmatic Programmers' Guide, Second Edition Programming Ruby: The Pragmatic Programmers' Guide, Second Edition Paperback/ Illustrated Object-Oriented Design/ Qualifying Textbooks - Winter 2007 $44.95 $20.53 4.5 Ruby is an increasingly popular, fully object-oriented dynamic programming language, hailed by many practitioners as the finest and most useful language available today. When Ruby first burst onto the scene in the Western world, the Pragmatic Programmers were there with the definitive reference manual, Programming Ruby: The Pragmatic Programmer's Guide. Now in its second edition, author Dave Thomas has expanded the famous Pickaxe book with over 200 pages of new content, covering all the new and improved language features of Ruby 1.8 and standard library modules. The Pickaxe contains four major sections:/ • An acclaimed tutorial on using Ruby./ • The definitive reference to the language./ • Complete documentation on all built-in classes, modules, and methods/ • Complete descriptions of all 98 standard libraries./ If you enjoyed the First Edition, you'll appreciate the new and expanded content, including: enhanced coverage of installation, packaging, documenting Ruby source code, threading and synchronization, and enhancing Ruby's capabilities using C-language extensions. Programming for the world-wide web is easy in Ruby, with new chapters on XML//RPC, SOAP, distributed Ruby, templating systems and other web services. There's even a new chapter on unit testing. This is the definitive reference manual for Ruby, including a description of all the standard library modules, a complete reference to all built-in classes and modules (including more than 250 significant changes since the First Edition). Coverage of other features has grown tremendously, including details on how to harness the sophisticated capabilities of irb, so you can dynamically examine and experiment with your running code. "Ruby is a wonderfully powerful and useful language, and whenever I'm working with it this book is at my side" --Martin Fowler, Chief Scientist, ThoughtWorks Pragmatic Bookshelf 01-10-2004 07-02-2007 Dave Thomas/ Chad Fowler/ Andy Hunt 828 -book 9780974514000 0974514004 us Pragmatic Version Control Using CVS Pragmatic Version Control Using CVS Paperback/ Illustrated Software Development/ Information Systems/ Information Theory $29.95 $14.35 4 This book is a recipe-based approach to using the CVS Version Control system that will get you up and running quickly--and correctly. All projects need version control: it's a foundational piece of any project's infrastructure. Yet half of all project teams in the U.S. don't use any version control at all. Many others don't use it well, and end up experiencing time-consuming problems. Version Control, done well, is your "undo" button for the project: nothing is final, and mistakes are easily rolled back. With version control, you'll never again lose a good idea because someone overwrote your file edits. You can always find out easily who made what changes to the source code--and why. Version control is a project-wide time machine. Dial in a date and see exactly what the entire project looked like yesterday, last Tuesday, or even last year. This book describes a practical, easy-to-follow way of using CVS, the most commonly used version control system in the world (and it's available for free). Instead of presenting the grand Theory of Version Control and describing every possible option (whether you'd ever use it or not), this book focuses on the practical application of CVS. It builds a set of examples of use that parallel the life of typical projects, showing you how to adopt and then enhance your pragmatic use of CVS. With this book, you can:/ • Keep project all assets (not just source code) safe, and never run the risk of losing a great idea/ • Know how to undo bad decisions--no matter when they were made/ • Learn how to share code safely, and work in parallel for maximum efficiency/ • See how to avoid costly code freezes/ • Manage 3rd party code/ Now there's no excuse not to use professional-grade version control. The Pragmatic Programmers 09-02-2003 07-02-2007 David Thomas/ Andrew Hunt 161 -book 7805961006053 0596100604 us Astronomy Hacks Astronomy Hacks Paperback/ Illustrated Astronomy/ Star-Gazing $24.95 $13.93 5 Why use the traditional approach to study the stars when you can turn computers, handheld devices, and telescopes into out-of-this-world stargazing tools? Whether you're a first timer or an advanced hobbyist, you'll find Astronomy Hacks both useful and fun. From upgrading your optical finder to photographing stars, this book is the perfect cosmic companion. This handy field guide covers the basics of observing, and what you need to know about tweaking, tuning, adjusting, and tricking out a 'scope. Expect priceless tips and tools for using a Dobsonian Telescope, the large-aperture telescope you can inexpensively build in your garage. Get advice on protocols involved with using electronics including in dark places without ruining the party. Astronomy Hacks begins the space exploration by getting you set up with the right equipment for observing and admiring the stars in an urban setting. Along for the trip are first rate tips for making most of observations. The hacks show you how to:/ • Dark-Adapt Your Notebook Computer/ • Choose the Best Binocular/ • Clean Your Eyepieces and Lenses Safely/ • Upgrade Your Optical Finder/ • Photograph the Stars with Basic Equipment/ The O'Reilly Hacks series has reclaimed the term "hacking" to mean innovating, unearthing, and creating shortcuts, gizmos, and gears. With these hacks, you don't dream it-you do it--and Astronomy Hacks brings space dreams to life. The book is essential for anyone who wants to get the most out of an evening under the stars and have memorable celestial adventures. O'Reilly Media 17-06-2005 08-02-2007 Robert Bruce Thompson/ Barbara Fritchman Thompson 388 -book 9780877736752 0877736758 us The Tibetan Book of the Dead (Shambala Pocket Classics) The Tibetan Book of the Dead (Shambala Pocket Classics) Paperback Motivational/ Book of the Dead (Tibetan)/ Mysticism/ Eastern Philosophy/ Death/ Rituals & Practice $7.00 $2.56 4.5 In this classic scripture of Tibetan Buddhism—traditionally read aloud to the dying to help them attain liberation—death and rebirth are seen as a process that provides an opportunity to recognize the true nature of mind. This unabridged translation of The Tibetan Book of the Dead emphasizes the practical advice that the book offers to the living. The insightful commentary by Chögyam Trungpa, written in clear, concise language, explains what the text teaches us about human psychology. This book will be of interest to people concerned with death and dying, as well as those who seek greater spiritual understanding in everyday life. Shambhala 13-10-1992 11-02-2007 Chogyam Trungpa 236 -book 9781592400874 1592400876 us Eats, Shoots & Leaves: The Zero Tolerance Approach to Punctuation Eats, Shoots & Leaves: The Zero Tolerance Approach to Punctuation Hardcover Essays/ Grammar/ Reference/ Writing Skills $19.95 $1.84 4 A bona fide publishing phenomenon, Lynne Truss's now classic #1 New York Times bestseller Eats, Shoots & Leaves makes its paperback debut after selling over 3 million copies worldwide in hardcover./ We all know the basics of punctuation. Or do we? A look at most neighborhood signage tells a different story. Through sloppy usage and low standards on the Internet, in e-mail, and now text messages, we have made proper punctuation an endangered species./ In Eats, Shoots & Leaves, former editor Truss dares to say, in her delightfully urbane, witty, and very English way, that it is time to look at our commas and semicolons and see them as the wonderful and necessary things they are. This is a book for people who love punctuation and get upset when it is mishandled. From the invention of the question mark in the time of Charlemagne to George Orwell shunning the semicolon, this lively history makes a powerful case for the preservation of a system of printing conventions that is much too subtle to be mucked about with. BACKCOVER: Praise for Lynne Truss and Eats, Shoots & Leaves:

Eats, Shoots & Leaves“makes correct usage so cool that you have to admire Ms. Truss.”
—Janet Maslin, The New York Times

“Witty, smart, passionate.”
—Los Angeles Times Book Review, Best Books Of 2004: Nonfiction 

“Who knew grammar could be so much fun?”
—Newsweek

“Witty and instructive. . . . Truss is an entertaining, well-read scold in a culture that could use more scolding.”
—USA Today“Truss is William Safire crossed with John Cleese's Basil Fawlty.”
—Entertainment Weekly

“Lynne Truss has done the English-speaking world a huge service.”
—The Christian Science Monitor

“This book changed my life in small, perfect ways like learning how to make better coffee or fold an omelet. It's the perfect gift for anyone who cares about grammar and a gentle introduction for those who don't care enough.”
—The Boston Sunday Globe

“Lynne Truss makes [punctuation] a joy to contemplate.”
—Elle

“If Lynne Truss were Roman Catholic I'd nominate her for sainthood.” —Frank McCourt, author of Angela's Ashes

“Truss's scholarship is impressive and never dry.”
—Edmund Morris, The New York Times Book Review/ Gotham 12-04-2004 03-02-2007 Lynne Truss 209 -book 9780679724346 0679724346 us Tao Te Ching [Text Only] Tao Te Ching [Text Only] Paperback Taoism/ Tao Te Ching/ Chinese/ Bible & Other Sacred Texts $9.95 $2.88 4.5 Available for the first time in a handy, easy-to-use size, here is the most accessible and authoritative modern English translation of the ancient Chinese classic. This new Vintage edition includes an introduction and notes by the well-known writer and scholar of philosophy and comparative religion, Jacob Needleman. Vintage 28-08-1989 11-02-2007 Lao Tsu/ Jane English/ Jacob Needleman 144 -book 9780844285269 0844285269 us Easy Chinese Phrasebook & Dictionary Easy Chinese Phrasebook & Dictionary Paperback English (All)/ English (British)/ Chinese/ Conversation/ Phrasebooks - General/ Southeast Asian/ Linguistics $12.95 $3.00 4.5 Two books in one—a practical phrasebook of essential Chinese vocabulary and expressions, plus a 3,500 English//Chinese dictionary. McGraw-Hill 07-12-1990 11-02-2007 Yeou-Koung Tung 272 -book 9780877738510 0877738513 us Art of Peace (Shambhala Pocket Classics) Art of Peace (Shambhala Pocket Classics) Paperback New Age/ Eastern Philosophy/ Aikido/ Martial Arts $6.95 $2.59 4.5 These inspirational teachings show that the real way of the warrior is based on compassion, wisdom, fearlessness, and love of nature. Drawn from the talks and writings of Morihei Ueshiba, founder of the popular Japanese martial art known as Aikido, The Art of Peace, presented here in a pocket-sized edition, offers a nonviolent way to victory and a convincing counterpoint to such classics as Musashi's Book of Five Rings and Sun Tzu's Art of War. Shambhala 10-11-1992 11-02-2007 Morihei Ueshiba/ John Stevens 126 -book 9780912111476 091211147X us Learn to Read Chinese: An Introduction to the Language and Concepts of Current Zhongyi Literature (Learn to Read Chinese) Learn to Read Chinese: An Introduction to the Language and Concepts of Current Zhongyi Literature (Learn to Read Chinese) Paperback Acupuncture & Acupressure/ Chinese/ Southeast Asian/ Linguistics/ Chinese Medicine $30.00 $17.50 These two volumes teach the language of contemporary Chinese technical literature. The subject matter is Chinese medicine, making these texts ideal for those who wish to learn Chinese from real-world sources. All 128 of the texts chosen are excerpted from the introduction to Chinese medicine written by Qin Bowei, one of the founders of TCM and a medical writer known for his clear, precise and detailed clinical expression. The work is thus a superb supplement for students of Chinese and an effective course of study for clinicians or scholars who wish to read Chinese technical periodicals, papers and texts./ The first volume teaches vocabulary. Each text is an exercise; readers transliterate, then translate a passage based on the simplified character vocabulary provided with each passage and its preceding passages. A completed transliteration in Pinyin and a finished English translation accompany the Chinese. The subject matter provides an exposure to authentic contemporary discussions of the fundamental principles of Chinese medicine./ The second volume teaches analysis of Chinese texts through the principles of Natural Language development. By showing how to identify the basic statement in a sentence and the adjunct statements that complete its meaning, just as children learn to read their native language, the reader is given access to Chinese texts as quickly as is possible. When the course is completed, users are working with typical modern Chinese medical sources./ Paradigm Publications (MA) 09-02-1994 11-02-2007 Paul U. Unschuld 2 -book 9780521777681 0521777682 us The Elements of Java Style The Elements of Java Style Paperback Object-Oriented Design/ Software Development/ Computers & Internet $14.99 $2.98 4 The Elements of Java Style, written by renowned author Scott Ambler, Alan Vermeulen, and a team of programmers from Rogue Wave Software, is directed at anyone who writes Java code. Many books explain the syntax and basic use of Java; however, this essential guide explains not only what you can do with the syntax, but what you ought to do. Just as Strunk and White's The Elements of Style provides rules of usage for the English language, this text furnishes a set of rules for Java practitioners. While illustrating these rules with parallel examples of correct and incorrect usage, the authors offer a collection of standards, conventions, and guidelines for writing solid Java code that will be easy to understand, maintain, and enhance. Java developers and programmers who read this book will write better Java code, and become more productive as well. Indeed, anyone who writes Java code or plans to learn how to write Java code should have this book next to his//her computer. Cambridge University Press 01-02-2000 07-02-2007 Allan Vermeulen/ Scott W. Ambler/ Greg Bumgardner/ Eldon Metz/ Trevor Misfeldt/ Jim Shur/ Alan Vermeulen/ Patrick Thompson 125 -book 0723812607006 0471463620 us Java Open Source Programming: with XDoclet, JUnit, WebWork, Hibernate Java Open Source Programming: with XDoclet, JUnit, WebWork, Hibernate Paperback Software Development/ Computers & Internet $45.00 $1.65 4 The Java language itself is not strictly open-source (Sun has held onto control, albeit with lots of public input). There is, however, a large open-source development community around this highly capable language. Java Open Source Programming describes and provides tutorials on some of the most interesting public Java projects, and is designed to enable a Java programmer (who's worked through the basic language's initial learning curve) to take on more ambitious assignments. The authors generally treat the covered open-source packages as resources to be used, rather than projects to be contributed to, and so it's fair to think of this volume as the "missing manual" for downloaded code. In that spirit, the authors devote many sections to "how to" subjects (addressing, for example, a good way to retrieve stored objects from a database and the procedure for calling an action in XWork)./ Java Open Source Programming takes a bit of a risk by devoting a lot of space to the development of a complex application (an online pet shop), as such a didactic strategy can be hard to follow. The authors pull it off, though, and manage to show that their covered technologies can be used to create a feature-rich and robust application that uses the versatile model-view-controller (MVC) pattern. This book will suit you well if you're planning an MVC Java project and want to take advantage of open-source packages. --David Wall/ Topics covered: The most popular open-source Java packages, particularly those concerned with Web applications and the model-view-controller (MVC) pattern. Specific packages covered include JUnit and Mocks (code testing), Hibernate (persistent storage of objects in databases), WebWork (MVC), SiteMesh (Web page layout), Lucene (site searching), and WebDoclet (configuration file generation)./ Wiley 28-11-2003 07-02-2007 Joseph Walnes/ Ara Abrahamian/ Mike Cannon-Brookes/ Patrick A. Lightbody 480 -book 9780201082593 0201082594 us Artificial Intelligence Edition (Addison-Wesley series in computer science) Artificial Intelligence Edition (Addison-Wesley series in computer science) Paperback Questions & Answers/ Information Systems $32.61 $0.01 3.5 This book is one of the oldest and most popular introductions to artificial intelligence. An accomplished artificial intelligence (AI) scientist, Winston heads MIT's Artificial Intelligence Laboratory, and his hands-on AI research experience lends authority to what he writes. Winston provides detailed pseudo-code for most of the algorithms discussed, so you will be able to implement and test the algorithms immediately. The book contains exercises to test your knowledge of the subject and helpful introductions and summaries to guide you through the material. Addison-wesley 04-02-1984 08-02-2007 Patrick Henry Winston 527 -book 9781558605701 1558605703 us Managing Gigabytes: Compressing and Indexing Documents and Images (The Morgan Kaufmann Series in Multimedia and Information Systems) Managing Gigabytes: Compressing and Indexing Documents and Images (The Morgan Kaufmann Series in Multimedia and Information Systems) Hardcover Storage/ Compression/ Software Development/ Software Project Management/ Electronic Documents/ General & Reference/ Engineering/ Peripherals/ Digital Image Processing/ Qualifying Textbooks - Winter 2007 $74.95 $49.98 4.5 Of all the tasks programmers are asked to perform, storing, compressing, and retrieving information are some of the most challenging--and critical to many applications. Managing Gigabytes: Compressing and Indexing Documents and Images is a treasure trove of theory, practical illustration, and general discussion in this fascinating technical subject./ Ian Witten, Alistair Moffat, and Timothy Bell have updated their original work with this even more impressive second edition. This version adds recent techniques such as block-sorting, new indexing techniques, new lossless compression strategies, and many other elements to the mix. In short, this work is a comprehensive summary of text and image compression, indexing, and querying techniques. The history of relevant algorithm development is woven well with a practical discussion of challenges, pitfalls, and specific solutions./ This title is a textbook-style exposition on the topic, with its information organized very clearly into topics such as compression, indexing, and so forth. In addition to diagrams and example text transformations, the authors use "pseudo-code" to present algorithms in a language-independent manner wherever possible. They also supplement the reading with mg--their own implementation of the techniques. The mg C language source code is freely available on the Web./ Alone, this book is an impressive collection of information. Nevertheless, the authors list numerous titles for further reading in selected topics. Whether you're in the midst of application development and need solutions fast or are merely curious about how top-notch information management is done, this hardcover is an excellent investment. --Stephen W. Plain/ Topics covered: Text compression models, including Huffman, LZW, and their variants; trends in information management; index creation and compression; image compression; performance issues; and overall system implementation./ Morgan Kaufmann 15-05-1999 07-02-2007 Ian H. Witten/ Alistair Moffat/ Timothy C. Bell 519 -book 9780596101190 0596101198 us Open Source for the Enterprise: Managing Risks Reaping Rewards Open Source for the Enterprise: Managing Risks Reaping Rewards Paperback/ Illustrated Technical Support/ Programming/ Risks/ Linux $22.95 $12.50 5 Open source software is changing the world of Information Technology. But making it work for your company is far more complicated than simply installing a copy of Linux. If you are serious about using open source to cut costs, accelerate development, and reduce vendor lock-in, you must institutionalize skills and create new ways of working. You must understand how open source is different from commercial software and what responsibilities and risks it brings. Open Source for the Enterprise is a sober guide to putting open source to work in the modern IT department./ Open source software is software whose code is freely available to anyone who wants to change and redistribute it. New commercial support services, smaller licensing fees, increased collaboration, and a friendlier platform to sell products and services are just a few of the reasons open source is so attractive to IT departments. Some of the open source projects that are in current, widespread use in businesses large and small include Linux, FreeBSD, Apache, MySQL, PostgreSQL, JBOSS, and Perl. These have been used to such great effect by Google, Amazon, Yahoo!, and major commercial and financial firms, that a wave of publicity has resulted in recent years, bordering on hype. Large vendors such as IBM, Novell, and Hewlett Packard have made open source a lynchpin of their offerings. Open source has entered a new area where it is being used as a marketing device, a collaborative software development methodology, and a business model./ This book provides something far more valuable than either the cheerleading or the fear-mongering one hears about open source. The authors are Dan Woods, former CTO of TheStreet.com and a consultant and author of several books about IT, and Gautam Guliani, Director of Software Architecture at Kaplan Test Prep & Admissions. Each has used open source software for some 15 years at IT departments large and small. They have collected the wisdom of a host of experts from IT departments, open source communities, and software companies./ Open Source for the Enterprise provides a top to bottom view not only of the technology, but of the skills required to manage it and the organizational issues that must be addressed. Here are the sorts of questions answered in the book:/ • Why is there a "productization gap" in most open source projects?/ • How can the maturity of open source be evaluated?/ • How can the ROI of open source be calculated?/ • What skills are needed to use open source?/ • What sorts of open source projects are appropriate for IT departments at the beginner, intermediate, advanced, and expert levels?/ • What questions need to be answered by an open source strategy?/ • What policies for governance can be instituted to control the adoption of open source?/ • What new commercial services can help manage the risks of open source?/ • Do differences in open source licenses matter?/ • How will using open source transform an IT department?/ Praise for Open Source for the Enterprise: "Open Source has become a strategic business issue; decisions on how and where to choose to use Open Source now have a major impact on the overall direction of IT abilities to support the business both with capabilities and by controlling costs. This is a new game and one generally not covered in existing books on Open Source which continue to assume that the readers are 'deep dive' technologists, Open Source for the Enterprise provides everyone from business managers to technologists with the balanced view that has been missing. Well worth the time to read, and also worth encouraging others in your enterprise to read as well." ----Andy Mulholland - Global CTO Capgemini/ "Open Source for the Enterprise is required reading for anyone working with or looking to adopt open source technologies in a corporate environment. Its practical, no-BS approach will make sure you're armed with the information you need to deploy applications successfully (as well as helping you know when to say "no"). If you're trying to sell open source to management, this book will give you the ammunition you need. If you're a manager trying to drive down cost using open source, this book will tell you what questions to ask your staff. In short, it's a clear, concise explanation of how to successfully leverage open source without making the big mistakes that can get you fired." ----Kevin Bedell - founding editor of LinuxWorld Magazine/ O'Reilly Media 27-07-2005 08-02-2007 Dan Woods/ Gautam Guliani 217 -movie 0786936259223 B00030590I us Hero Hero DVD China/ Leung Chiu Wai, Tony/ Jet Li $19.99 $5.49 4 Master filmmaker Quentin Tarantino presents HERO -- starring martial arts legend Jet Li in a visually stunning martial arts epic where a fearless warrior rises up to defy an empire and unite a nation! With supernatural skill ... and no fear ... a nameless soldier (Jet Li) embarks on a mission of revenge against the fearsome army that massacred his people. Now, to achieve the justice he seeks, he must take on the empire's most ruthless assassins and reach the enemy he has sworn to defeat! Acclaimed by critics and honored with numerous awards, HERO was nominated for both an Oscar® (2002 Best Foreign Language Film)and Golden Globe! Miramax 30-11-2004 07-02-2007 5 Yimou Zhang Jet Li/ Tony Leung Chiu Wai/ Maggie Cheung/ Ziyi Zhang/ Daoming Chen/ Donnie Yen/ Liu Zhong Yuan/ Zheng Tia Yong/ Yan Qin/ Chang Xiao Yang/ Zhang Ya Kun/ Ma Wen Hua/ Jin Ming/ Xu Kuang Hua/ Wang Shou Xin/ Hei Zi/ Cao Hua/ Li Lei/ Xia Bin/ Peng Qiang/ Zhang Yimou Closed-captioned/ Color/ Dolby/ Dubbed/ Subtitled/ Widescreen/ NTSC/ 2.35:1 PG-13 27-08-2004 99 -book 0752064712350 0735712352 us Cocoon: Building XML Applications Cocoon: Building XML Applications Paperback Software Development/ HTML - General/ XML/ Combinatorics/ jp-unknown1 $39.99 $2.30 4 Cocoon: Building XML Applications is the guide to the Apache Cocoon project. The book contains the much needed documentation on the Cocoon project, but it does not limit itself to just being a developer's handbook. The book motivates the use of XML and XML software (in particular open source software). It contains everything a beginner needs to get going with Cocoon as well as the detailed information a developer needs to develop new and exciting components to extend the XML publishing framework. Although each chapter builds upon the previous ones, the book is designed so that the chapters can also be read as individual guides to the topics they discuss. Varied "hands-on" examples are used to make the underlying concepts and technologies absolutely clear to anyone starting out with Cocoon. Chapters that detail the author's experience in building Internet applications are used to embed Cocoon into the "real world" and complete the picture. [md]Matthew Langham and Carsten Ziegeler Sams 24-07-2002 07-02-2007 Carsten Ziegeler/ Matthew Langham 504 \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/examples/marc/marc_importer.rb solr-3.3/client/ruby/solr-ruby/examples/marc/marc_importer.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/examples/marc/marc_importer.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/examples/marc/marc_importer.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,106 +0,0 @@ -#!/usr/bin/env ruby -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'marc' -require 'solr' - -solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr" -marc_filename = ARGV[0] -file_number = marc_filename.scan(/\d\d/) -debug = ARGV[1] == "-debug" - -$KCODE = 'UTF8' - -mapping = { - # :solr_field_name => String - # :solr_field_name => Array of Strings - # :solr_field_name => Proc [Proc operates on record] - # String = 3 digit control field number or 3 digit data field number + subfield letter - - :id => '001', - :subject_genre_facet => ['600v', '610v', '611v', '650v', '651v', '655a'], - :subject_era_facet => ['650d', '650y', '651y', '655y'], - :subject_topic_facet => ['650a', '650b', '650x'], - :subject_geographic_facet => ['650c', '650z', '651a', '651x', '651z', '655z'], - :year_facet => Proc.new do |r| - extract_record_data(r,'260c').collect {|f| f.scan(/\d\d\d\d/)}.flatten - end, - :title_text => '245a', - :author_text => '100a', - :call_number_text => '050a', - :isbn_text => '010a', - :filename_facet => Proc.new {|r| file_number}, -} - -connection = Solr::Connection.new(solr_url) - -if marc_filename =~ /.gz$/ - puts "Unzipping data file..." - temp_filename = "/tmp/marc_data_#{file_number}.mrc" - system("cp #{marc_filename} #{temp_filename}.gz") - system("gunzip #{temp_filename}") - marc_filename = temp_filename -end - -reader = MARC::Reader.new(marc_filename) -count = 0 - -def extract_record_data(record, fields) - extracted_data = [] - - fields.each do |field| - tag = field[0,3] - - extracted_fields = record.find_all {|f| f.tag === tag} - - extracted_fields.each do |field_instance| - if tag < '010' # control field - extracted_data << field_instance.value rescue nil - else # data field - subfield = field[3].chr - extracted_data << field_instance[subfield] rescue nil - end - end - end - - extracted_data.compact.uniq -end - -puts "Indexing #{marc_filename}..." -for record in reader - doc = {} - mapping.each do |key,value| - data = nil - case value - when Proc - data = value.call(record) - - when String, Array - data = extract_record_data(record, value) - data = nil if data.empty? - end - - doc[key] = data if data - end - - puts doc.inspect,"------" if debug - - connection.send(Solr::Request::AddDocument.new(doc)) unless debug - - count += 1 - - puts count if count % 100 == 0 -end - -connection.send(Solr::Request::Commit.new) unless debug -puts "Done" diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/examples/tang/tang_importer.rb solr-3.3/client/ruby/solr-ruby/examples/tang/tang_importer.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/examples/tang/tang_importer.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/examples/tang/tang_importer.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'hpricot' -require 'solr' - -solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr" -debug = ARGV[1] == "-debug" - -solr = Solr::Connection.new(solr_url) - -html = Hpricot(open(ARGV[0])) -max = 320 - -def next_blockquote(elem) - elem = elem.next_sibling - until elem.name == "blockquote" do - elem = elem.next_sibling - end - - elem -end - -for current_index in (1..max) do - section_start = html.at("//blockquote[text()='#{format('%03d',current_index)}']") - type_zh = next_blockquote(section_start) - author_zh = next_blockquote(type_zh) - title_zh = next_blockquote(author_zh) - body_zh = next_blockquote(title_zh) - - type_en = next_blockquote(body_zh) - author_en = next_blockquote(type_en) - title_en = next_blockquote(author_en) - body_en = next_blockquote(title_en) - doc = {:type_zh_facet => type_zh, :author_zh_facet => author_zh, :title_zh_text => title_zh, :body_zh_text => body_zh, - :type_en_facet => type_en, :author_en_facet => author_en, :title_en_text => title_en, :body_en_text => body_en - } - doc.each {|k,v| doc[k] = v.inner_text} - doc[:id] = current_index # TODO: namespace the id, something like "etext_tang:#{current_index}" - doc[:source_facet] = 'etext_tang' - doc[:language_facet] = ['chi','eng'] - - puts "----",doc[:id],doc[:title_en_text],doc[:author_en_facet],doc[:type_en_facet] -# puts doc.inspect if debug - solr.add doc unless debug -end - -solr.commit unless debug -#solr.optimize unless debug \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/connection.rb solr-3.3/client/ruby/solr-ruby/lib/solr/connection.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/connection.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/connection.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'net/http' - -# TODO: add a convenience method to POST a Solr .xml file, like Solr's example post.sh - -class Solr::Connection - attr_reader :url, :autocommit, :connection - - # create a connection to a solr instance using the url for the solr - # application context: - # - # conn = Solr::Connection.new("http://example.com:8080/solr") - # - # if you would prefer to have all adds/updates autocommitted, - # use :autocommit => :on - # - # conn = Solr::Connection.new('http://example.com:8080/solr', - # :autocommit => :on) - - def initialize(url="http://localhost:8983/solr", opts={}) - @url = URI.parse(url) - unless @url.kind_of? URI::HTTP - raise "invalid http url: #{url}" - end - - # TODO: Autocommit seems nice at one level, but it currently is confusing because - # only calls to Connection#add/#update/#delete, though a Connection#send(AddDocument.new(...)) - # does not autocommit. Maybe #send should check for the request types that require a commit and - # commit in #send instead of the individual methods? - @autocommit = opts[:autocommit] == :on - - # Not actually opening the connection yet, just setting up the persistent connection. - @connection = Net::HTTP.new(@url.host, @url.port) - - @connection.read_timeout = opts[:timeout] if opts[:timeout] - end - - # add a document to the index. you can pass in either a hash - # - # conn.add(:id => 123, :title => 'Tlon, Uqbar, Orbis Tertius') - # - # or a Solr::Document - # - # conn.add(Solr::Document.new(:id => 123, :title = 'On Writing') - # - # true/false will be returned to designate success/failure - - def add(doc) - request = Solr::Request::AddDocument.new(doc) - response = send(request) - commit if @autocommit - return response.ok? - end - - # update a document in the index (really just an alias to add) - - def update(doc) - return add(doc) - end - - # performs a standard query and returns a Solr::Response::Standard - # - # response = conn.query('borges') - # - # alternative you can pass in a block and iterate over hits - # - # conn.query('borges') do |hit| - # puts hit - # end - # - # options include: - # - # :sort, :default_field, :rows, :filter_queries, :debug_query, - # :explain_other, :facets, :highlighting, :mlt, - # :operator => :or / :and - # :start => defaults to 0 - # :field_list => array, defaults to ["*", "score"] - - def query(query, options={}, &action) - # TODO: Shouldn't this return an exception if the Solr status is not ok? (rather than true/false). - create_and_send_query(Solr::Request::Standard, options.update(:query => query), &action) - end - - # performs a dismax search and returns a Solr::Response::Standard - # - # response = conn.search('borges') - # - # options are same as query, but also include: - # - # :tie_breaker, :query_fields, :minimum_match, :phrase_fields, - # :phrase_slop, :boost_query, :boost_functions - - def search(query, options={}, &action) - create_and_send_query(Solr::Request::Dismax, options.update(:query => query), &action) - end - - # sends a commit message to the server - def commit(options={}) - response = send(Solr::Request::Commit.new(options)) - return response.ok? - end - - # sends an optimize message to the server - def optimize - response = send(Solr::Request::Optimize.new) - return response.ok? - end - - # pings the connection and returns true/false if it is alive or not - def ping - begin - response = send(Solr::Request::Ping.new) - return response.ok? - rescue - return false - end - end - - # delete a document from the index using the document id - def delete(document_id) - response = send(Solr::Request::Delete.new(:id => document_id)) - commit if @autocommit - response.ok? - end - - # delete using a query - def delete_by_query(query) - response = send(Solr::Request::Delete.new(:query => query)) - commit if @autocommit - response.ok? - end - - def info - send(Solr::Request::IndexInfo.new) - end - - # send a given Solr::Request and return a RubyResponse or XmlResponse - # depending on the type of request - def send(request) - data = post(request) - Solr::Response::Base.make_response(request, data) - end - - # send the http post request to solr; for convenience there are shortcuts - # to some requests: add(), query(), commit(), delete() or send() - def post(request) - response = @connection.post(@url.path + "/" + request.handler, - request.to_s, - { "Content-Type" => request.content_type }) - - case response - when Net::HTTPSuccess then response.body - else - response.error! - end - - end - -private - - def create_and_send_query(klass, options = {}, &action) - request = klass.new(options) - response = send(request) - return response unless action - response.each {|hit| action.call(hit)} - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/document.rb solr-3.3/client/ruby/solr-ruby/lib/solr/document.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/document.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/document.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' -require 'solr/field' - -class Solr::Document - include Enumerable - attr_accessor :boost - - # Create a new Solr::Document, optionally passing in a hash of - # key/value pairs for the fields - # - # doc = Solr::Document.new(:creator => 'Jorge Luis Borges') - def initialize(hash={}) - @fields = [] - self << hash - end - - # Append a Solr::Field - # - # doc << Solr::Field.new(:creator => 'Jorge Luis Borges') - # - # If you are truly lazy you can simply pass in a hash: - # - # doc << {:creator => 'Jorge Luis Borges'} - def <<(fields) - case fields - when Hash - fields.each_pair do |name,value| - if value.respond_to?(:each) && !value.is_a?(String) - value.each {|v| @fields << Solr::Field.new(name => v)} - else - @fields << Solr::Field.new(name => value) - end - end - when Solr::Field - @fields << fields - else - raise "must pass in Solr::Field or Hash" - end - end - - # shorthand to allow hash lookups - # doc['name'] - def [](name) - field = @fields.find {|f| f.name == name.to_s} - return field.value if field - return nil - end - - # shorthand to assign as a hash - def []=(name,value) - @fields << Solr::Field.new(name => value) - end - - # convert the Document to a REXML::Element - def to_xml - e = Solr::XML::Element.new 'doc' - e.attributes['boost'] = @boost.to_s if @boost - @fields.each {|f| e.add_element(f.to_xml)} - return e - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/exception.rb solr-3.3/client/ruby/solr-ruby/lib/solr/exception.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/exception.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/exception.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Exception < Exception; end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/field.rb solr-3.3/client/ruby/solr-ruby/lib/solr/field.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/field.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/field.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' -require 'time' - -class Solr::Field - VALID_PARAMS = [:boost] - attr_accessor :name - attr_accessor :value - attr_accessor :boost - - # Accepts an optional :boost parameter, used to boost the relevance of a particular field. - def initialize(params) - @boost = params[:boost] - name_key = (params.keys - VALID_PARAMS).first - @name, @value = name_key.to_s, params[name_key] - # Convert any Time values into UTC/XML schema format (which Solr requires). - @value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s - end - - def to_xml - e = Solr::XML::Element.new 'field' - e.attributes['name'] = @name - e.attributes['boost'] = @boost.to_s if @boost - e.text = @value - return e - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/array_mapper.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer/array_mapper.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/array_mapper.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer/array_mapper.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -class Solr::Importer::ArrayMapper < Solr::Importer::Mapper - # TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN] - - # TODO: make merge conflict handling configurable. as is, the last map fields win. - def map(orig_data_array) - mapped_data = {} - orig_data_array.each_with_index do |data,i| - mapped_data.merge!(@mapping[i].map(data)) - end - mapped_data - end -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/delimited_file_source.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer/delimited_file_source.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/delimited_file_source.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer/delimited_file_source.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# For files with the first line containing field names -# Currently not designed for enormous files, as all lines are -# read into an array -class Solr::Importer::DelimitedFileSource - include Enumerable - - def initialize(filename, splitter=/\t/) - @filename = filename - @splitter = splitter - end - - def each - lines = IO.readlines(@filename) - headers = lines[0].split(@splitter).collect{|h| h.chomp} - - lines[1..-1].each do |line| - data = headers.zip(line.split(@splitter).collect{|s| s.chomp}) - def data.[](key) - self.assoc(key.to_s)[1] - end - - yield(data) - end - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/hpricot_mapper.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer/hpricot_mapper.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/hpricot_mapper.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer/hpricot_mapper.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -begin - require 'hpricot' - - class Solr::Importer::HpricotMapper < Solr::Importer::Mapper - def field_data(doc, path) - doc.search(path.to_s).collect { |e| e.inner_html } - end - end -rescue LoadError => e # If we can't load hpricot - class Solr::Importer::HpricotMapper - def initialize(mapping, options={}) - raise "Hpricot not installed." - end - end -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/mapper.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer/mapper.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/mapper.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer/mapper.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Importer::Mapper - def initialize(mapping, options={}) - @mapping = mapping - @options = options - end - - def field_data(orig_data, field_name) - orig_data[field_name] - end - - def mapped_field_value(orig_data, field_mapping) - case field_mapping - when String - field_mapping - when Proc - field_mapping.call(orig_data) # TODO pass in more context, like self or a function for field_data, etc - when Symbol - field_data(orig_data, @options[:stringify_symbols] ? field_mapping.to_s : field_mapping) - when Enumerable - field_mapping.collect {|orig_field_name| mapped_field_value(orig_data, orig_field_name)}.flatten - else - raise "Unknown mapping for #{field_mapping}" - end - end - - def map(orig_data) - mapped_data = {} - @mapping.each do |solr_name, field_mapping| - value = mapped_field_value(orig_data, field_mapping) - mapped_data[solr_name] = value if value - end - - mapped_data - end - - - - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/solr_source.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer/solr_source.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/solr_source.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer/solr_source.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr' - -class Solr::Importer::SolrSource - def initialize(solr_url, query, filter_queries=nil, options={}) - @connection = Solr::Connection.new(solr_url) - @query = query - @filter_queries = filter_queries - - @page_size = options[:page_size] || 1000 - @field_list = options[:field_list] || ["*"] - end - - def each - done = false - start = 0 - until done do - # request N documents from a starting point - request = Solr::Request::Standard.new(:query => @query, - :rows => @page_size, - :start => start, - :field_list => @field_list, - :filter_queries => @filter_queries) - response = @connection.send(request) - response.each do |doc| - yield doc # TODO: perhaps convert to HashWithIndifferentAccess.new(doc), so stringify_keys isn't necessary - end - done = start + @page_size >= response.total_hits - start = start + @page_size - end - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -begin - require 'xml/libxml' - - # For files with the first line containing field names - class Solr::Importer::XPathMapper < Solr::Importer::Mapper - def field_data(doc, xpath) - doc.find(xpath.to_s).collect do |node| - case node - when XML::Attr - node.value - when XML::Node - node.content - end - end - end - end -rescue LoadError => e # If we can't load libxml - class Solr::Importer::XPathMapper - def initialize(mapping, options={}) - raise "libxml not installed" - end - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer.rb solr-3.3/client/ruby/solr-ruby/lib/solr/importer.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/importer.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/importer.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module Solr; module Importer; end; end -require 'solr/importer/mapper' -require 'solr/importer/array_mapper' -require 'solr/importer/delimited_file_source' -require 'solr/importer/hpricot_mapper' -require 'solr/importer/xpath_mapper' -require 'solr/importer/solr_source' \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/indexer.rb solr-3.3/client/ruby/solr-ruby/lib/solr/indexer.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/indexer.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/indexer.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Indexer - attr_reader :solr - - # TODO: document options! - def initialize(data_source, mapper_or_mapping, options={}) - solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr" - @solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed - - @data_source = data_source - @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping - - @buffer_docs = options[:buffer_docs] - @debug = options[:debug] - end - - def index - buffer = [] - @data_source.each do |record| - document = @mapper.map(record) - - # TODO: check arrity of block, if 3, pass counter as 3rd argument - yield(record, document) if block_given? # TODO check return of block, if not true then don't index, or perhaps if document.empty? - - buffer << document - - if !@buffer_docs || buffer.size == @buffer_docs - add_docs(buffer) - buffer.clear - end - end - add_docs(buffer) if !buffer.empty? - - @solr.commit unless @debug - end - - def add_docs(documents) - @solr.add(documents) unless @debug - puts documents.inspect if @debug - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/add_document.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/add_document.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/add_document.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/add_document.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' -require 'solr/request/base' -require 'solr/document' -require 'solr/request/update' - -class Solr::Request::AddDocument < Solr::Request::Update - - # create the request, optionally passing in a Solr::Document - # - # request = Solr::Request::AddDocument.new doc - # - # as a short cut you can pass in a Hash instead: - # - # request = Solr::Request::AddDocument.new :creator => 'Jorge Luis Borges' - # - # or an array, to add multiple documents at the same time: - # - # request = Solr::Request::AddDocument.new([doc1, doc2, doc3]) - - def initialize(doc={}) - @docs = [] - if doc.is_a?(Array) - doc.each { |d| add_doc(d) } - else - add_doc(doc) - end - end - - # returns the request as a string suitable for posting - - def to_s - e = Solr::XML::Element.new 'add' - for doc in @docs - e.add_element doc.to_xml - end - return e.to_s - end - - private - def add_doc(doc) - case doc - when Hash - @docs << Solr::Document.new(doc) - when Solr::Document - @docs << doc - else - raise "must pass in Solr::Document or Hash" - end - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/base.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/base.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/base.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/base.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Request::Base - - - #TODO : Add base support for the debugQuery flag, and such that the response provides debug output easily - - # returns either :xml or :ruby depending on what the - # response type is for a given request - - def response_format - raise "unknown request type: #{self.class}" - end - - def content_type - 'text/xml; charset=utf-8' - end - - # returns the solr handler or url fragment that can - # respond to this type of request - - def handler - raise "unknown request type: #{self.class}" - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/commit.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/commit.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/commit.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/commit.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' - -class Solr::Request::Commit < Solr::Request::Update - - def initialize(options={}) - @wait_searcher = options[:wait_searcher] || true - @wait_flush = options[:wait_flush] || true - end - - - def to_s - e = Solr::XML::Element.new('commit') - e.attributes['waitSearcher'] = @wait_searcher ? 'true' : 'false' - e.attributes['waitFlush'] = @wait_flush ? 'true' : 'false' - - e.to_s - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/delete.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/delete.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/delete.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/delete.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' - -class Solr::Request::Delete < Solr::Request::Update - - # A delete request can be for a specific document id - # - # request = Solr::Request::Delete.new(:id => 1234) - # - # or by query: - # - # request = Solr::Request::Delete.new(:query => - # - def initialize(options) - unless options.kind_of?(Hash) and (options[:id] or options[:query]) - raise Solr::Exception.new("must pass in :id or :query") - end - if options[:id] and options[:query] - raise Solr::Exception.new("can't pass in both :id and :query") - end - @document_id = options[:id] - @query = options[:query] - end - - def to_s - delete_element = Solr::XML::Element.new('delete') - if @document_id - id_element = Solr::XML::Element.new('id') - id_element.text = @document_id - delete_element.add_element(id_element) - elsif @query - query = Solr::XML::Element.new('query') - query.text = @query - delete_element.add_element(query) - end - delete_element.to_s - end -end - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/dismax.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/dismax.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/dismax.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/dismax.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Request::Dismax < Solr::Request::Standard - - VALID_PARAMS.replace(VALID_PARAMS + [:tie_breaker, :query_fields, :minimum_match, :phrase_fields, :phrase_slop, - :boost_query, :boost_functions]) - - def initialize(params) - @alternate_query = params.delete(:alternate_query) - @sort_values = params.delete(:sort) - - super - - @query_type = "dismax" - end - - def to_hash - hash = super - hash[:tie] = @params[:tie_breaker] - hash[:mm] = @params[:minimum_match] - hash[:qf] = @params[:query_fields] - hash[:pf] = @params[:phrase_fields] - hash[:ps] = @params[:phrase_slop] - hash[:bq] = @params[:boost_query] - hash[:bf] = @params[:boost_functions] - hash["q.alt"] = @alternate_query - # FIXME: 2007-02-13 -- This code is duplicated in - # Solr::Request::Standard. It should be refactored into a single location. - hash[:sort] = @sort_values.collect do |sort| - key = sort.keys[0] - "#{key.to_s} #{sort[key] == :descending ? 'desc' : 'asc'}" - end.join(',') if @sort_values - return hash - end - -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/index_info.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/index_info.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/index_info.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/index_info.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Request::IndexInfo < Solr::Request::Select - - def handler - 'admin/luke' - end - - def to_hash - {:numTerms => 0}.merge(super.to_hash) - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/modify_document.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/modify_document.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/modify_document.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/modify_document.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' -require 'solr/request/base' -require 'solr/document' -require 'solr/request/update' - -class Solr::Request::ModifyDocument < Solr::Request::Update - - # Example: ModifyDocument.new(:id => 10, :overwrite => {:field_name => "new value"}) - def initialize(update_data) - modes = [] - @doc = {} - [:overwrite, :append, :distinct, :increment, :delete].each do |mode| - field_data = update_data[mode] - if field_data - field_data.each do |field_name, field_value| - modes << "#{field_name}:#{mode.to_s.upcase}" - @doc[field_name] = field_value if field_value # if value is nil, omit so it can be removed - end - update_data.delete mode - end - end - @mode = modes.join(",") - - # only one key should be left over, the id - @doc[update_data.keys[0].to_s] = update_data.values[0] - end - - # returns the request as a string suitable for posting - def to_s - e = Solr::XML::Element.new 'add' - e.add_element(Solr::Document.new(@doc).to_xml) - return e.to_s - end - - def handler - "update?mode=#{@mode}" - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/optimize.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/optimize.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/optimize.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/optimize.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr/xml' - -class Solr::Request::Optimize < Solr::Request::Update - - def to_s - Solr::XML::Element.new('optimize').to_s - end - -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/ping.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/ping.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/ping.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/ping.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: Consider something lazy like this? -# Solr::Request::Ping = Solr::Request.simple_request :format=>:xml, :handler=>'admin/ping' -# class Solr::Request -# def self.simple_request(options) -# Class.new do -# def response_format -# options[:format] -# end -# def handler -# options[:handler] -# end -# end -# end -# end - -class Solr::Request::Ping < Solr::Request::Base - def response_format - :xml - end - - def handler - 'admin/ping' - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/select.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/select.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/select.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/select.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,56 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'erb' - -# "Abstract" base class, only useful with subclasses that add parameters -class Solr::Request::Select < Solr::Request::Base - - attr_reader :query_type - - def initialize(qt=nil, params={}) - @query_type = qt - @select_params = params - end - - def response_format - :ruby - end - - def handler - 'select' - end - - def content_type - 'application/x-www-form-urlencoded; charset=utf-8' - end - - def to_hash - return {:qt => query_type, :wt => 'ruby'}.merge(@select_params) - end - - def to_s - raw_params = self.to_hash - - http_params = [] - raw_params.each do |key,value| - if value.respond_to?(:each) && !value.is_a?(String) - value.each { |v| http_params << "#{key}=#{ERB::Util::url_encode(v)}" unless v.nil?} - else - http_params << "#{key}=#{ERB::Util::url_encode(value)}" unless value.nil? - end - end - - http_params.join("&") - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/spellcheck.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/spellcheck.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/spellcheck.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/spellcheck.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Request::Spellcheck < Solr::Request::Select - - def initialize(params) - super('spellchecker') - @params = params - end - - def to_hash - hash = super - hash[:q] = @params[:query] - hash[:suggestionCount] = @params[:suggestion_count] - hash[:accuracy] = @params[:accuracy] - hash[:onlyMorePopular] = @params[:only_more_popular] - hash[:cmd] = @params[:command] - return hash - end - -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/standard.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/standard.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/standard.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/standard.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,374 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Request::Standard < Solr::Request::Select - - VALID_PARAMS = [:query, :sort, :default_field, :operator, :start, :rows, :shards, - :filter_queries, :field_list, :debug_query, :explain_other, :facets, :highlighting, :mlt] - - def initialize(params) - super('standard') - - raise "Invalid parameters: #{(params.keys - VALID_PARAMS).join(',')}" unless - (params.keys - VALID_PARAMS).empty? - - raise ":query parameter required" unless params[:query] - - @params = params.dup - - # Validate operator - if params[:operator] - raise "Only :and/:or operators allowed" unless - [:and, :or].include?(params[:operator]) - - @params[:operator] = params[:operator].to_s.upcase - end - - # Validate start, rows can be transformed to ints - @params[:start] = params[:start].to_i if params[:start] - @params[:rows] = params[:rows].to_i if params[:rows] - - @params[:field_list] ||= ["*","score"] - - @params[:shards] ||= [] - end - - def to_hash - hash = {} - - # standard request param processing - hash[:sort] = @params[:sort].collect do |sort| - key = sort.keys[0] - "#{key.to_s} #{sort[key] == :descending ? 'desc' : 'asc'}" - end.join(',') if @params[:sort] - hash[:q] = @params[:query] - hash["q.op"] = @params[:operator] - hash[:df] = @params[:default_field] - - # common parameter processing - hash[:start] = @params[:start] - hash[:rows] = @params[:rows] - hash[:fq] = @params[:filter_queries] - hash[:fl] = @params[:field_list].join(',') - hash[:debugQuery] = @params[:debug_query] - hash[:explainOther] = @params[:explain_other] - hash[:shards] = @params[:shards].join(',') unless @params[:shards].empty? - - # facet parameter processing - if @params[:facets] - # TODO need validation of all that is under the :facets Hash too - hash[:facet] = true - hash["facet.field"] = [] - hash["facet.query"] = @params[:facets][:queries] - hash["facet.sort"] = (@params[:facets][:sort] == :count) if @params[:facets][:sort] - hash["facet.limit"] = @params[:facets][:limit] - hash["facet.missing"] = @params[:facets][:missing] - hash["facet.mincount"] = @params[:facets][:mincount] - hash["facet.prefix"] = @params[:facets][:prefix] - hash["facet.offset"] = @params[:facets][:offset] - hash["facet.method"] = @params[:facets][:method] if @params[:facets][:method] - if @params[:facets][:fields] # facet fields are optional (could be facet.query only) - @params[:facets][:fields].each do |f| - if f.kind_of? Hash - key = f.keys[0] - value = f[key] - hash["facet.field"] << key - hash["f.#{key}.facet.sort"] = (value[:sort] == :count) if value[:sort] - hash["f.#{key}.facet.limit"] = value[:limit] - hash["f.#{key}.facet.missing"] = value[:missing] - hash["f.#{key}.facet.mincount"] = value[:mincount] - hash["f.#{key}.facet.prefix"] = value[:prefix] - hash["f.#{key}.facet.offset"] = value[:offset] - else - hash["facet.field"] << f - end - end - end - end - - # highlighting parameter processing - http://wiki.apache.org/solr/HighlightingParameters - if @params[:highlighting] - hash[:hl] = true - hash["hl.fl"] = @params[:highlighting][:field_list].join(',') if @params[:highlighting][:field_list] - - snippets = @params[:highlighting][:max_snippets] - if snippets - if snippets.kind_of? Hash - if snippets[:default] - hash["hl.snippets"] = snippets[:default] - end - if snippets[:fields] - snippets[:fields].each do |k,v| - hash["f.#{k}.hl.snippets"] = v - end - end - else - hash["hl.snippets"] = snippets - end - end - - fragsize = @params[:highlighting][:fragment_size] - if fragsize - if fragsize.kind_of? Hash - if fragsize[:default] - hash["hl.fragsize"] = fragsize[:default] - end - if fragsize[:fields] - fragsize[:fields].each do |k,v| - hash["f.#{k}.hl.fragsize"] = v - end - end - else - hash["hl.fragsize"] = fragsize - end - end - - rfm = @params[:highlighting][:require_field_match] - if nil != rfm - if rfm.kind_of? Hash - if nil != rfm[:default] - hash["hl.requireFieldMatch"] = rfm[:default] - end - if rfm[:fields] - rfm[:fields].each do |k,v| - hash["f.#{k}.hl.requireFieldMatch"] = v - end - end - else - hash["hl.requireFieldMatch"] = rfm - end - end - - mac = @params[:highlighting][:max_analyzed_chars] - if mac - if mac.kind_of? Hash - if mac[:default] - hash["hl.maxAnalyzedChars"] = mac[:default] - end - if mac[:fields] - mac[:fields].each do |k,v| - hash["f.#{k}.hl.maxAnalyzedChars"] = v - end - end - else - hash["hl.maxAnalyzedChars"] = mac - end - end - - prefix = @params[:highlighting][:prefix] - if prefix - if prefix.kind_of? Hash - if prefix[:default] - hash["hl.simple.pre"] = prefix[:default] - end - if prefix[:fields] - prefix[:fields].each do |k,v| - hash["f.#{k}.hl.simple.pre"] = v - end - end - else - hash["hl.simple.pre"] = prefix - end - end - - suffix = @params[:highlighting][:suffix] - if suffix - if suffix.kind_of? Hash - if suffix[:default] - hash["hl.simple.post"] = suffix[:default] - end - if suffix[:fields] - suffix[:fields].each do |k,v| - hash["f.#{k}.hl.simple.post"] = v - end - end - else - hash["hl.simple.post"] = suffix - end - end - - formatter = @params[:highlighting][:formatter] - if formatter - if formatter.kind_of? Hash - if formatter[:default] - hash["hl.formatter"] = formatter[:default] - end - if formatter[:fields] - formatter[:fields].each do |k,v| - hash["f.#{k}.hl.formatter"] = v - end - end - else - hash["hl.formatter"] = formatter - end - end - - fragmenter = @params[:highlighting][:fragmenter] - if fragmenter - if fragmenter.kind_of? Hash - if fragmenter[:default] - hash["hl.fragmenter"] = fragmenter[:default] - end - if fragmenter[:fields] - fragmenter[:fields].each do |k,v| - hash["f.#{k}.hl.fragmenter"] = v - end - end - else - hash["hl.fragmenter"] = fragmenter - end - end - - merge_contiguous = @params[:highlighting][:merge_contiguous] - if nil != merge_contiguous - if merge_contiguous.kind_of? Hash - if nil != merge_contiguous[:default] - hash["hl.mergeContiguous"] = merge_contiguous[:default] - end - if merge_contiguous[:fields] - merge_contiguous[:fields].each do |k,v| - hash["f.#{k}.hl.mergeContiguous"] = v - end - end - else - hash["hl.mergeContiguous"] = merge_contiguous - end - end - - increment = @params[:highlighting][:increment] - if increment - if increment.kind_of? Hash - if increment[:default] - hash["hl.increment"] = increment[:default] - end - if increment[:fields] - increment[:fields].each do |k,v| - hash["f.#{k}.hl.increment"] = v - end - end - else - hash["hl.increment"] = increment - end - end - - # support "old style" - alternate_fields = @params[:highlighting][:alternate_fields] - if alternate_fields - alternate_fields.each do |f,v| - hash["f.#{f}.hl.alternateField"] = v - end - end - - alternate_field = @params[:highlighting][:alternate_field] - if alternate_field - if alternate_field.kind_of? Hash - if alternate_field[:default] - hash["hl.alternateField"] = alternate_field[:default] - end - if alternate_field[:fields] - alternate_field[:fields].each do |k,v| - hash["f.#{k}.hl.alternateField"] = v - end - end - else - hash["hl.alternateField"] = alternate_field - end - end - - mafl = @params[:highlighting][:max_alternate_field_length] - if mafl - if mafl.kind_of? Hash - if mafl[:default] - hash["hl.maxAlternateFieldLength"] = mafl[:default] - end - if mafl[:fields] - mafl[:fields].each do |k,v| - hash["f.#{k}.hl.maxAlternateFieldLength"] = v - end - else - # support "old style" - mafl.each do |k,v| - hash["f.#{k}.hl.maxAlternateFieldLength"] = v - end - end - else - hash["hl.maxAlternateFieldLength"] = mafl - end - end - - hash["hl.usePhraseHighlighter"] = @params[:highlighting][:use_phrase_highlighter] - - regex = @params[:highlighting][:regex] - if regex - if regex[:slop] - if regex[:slop].kind_of? Hash - if regex[:slop][:default] - hash["hl.regex.slop"] = regex[:slop][:default] - end - if regex[:slop][:fields] - regex[:slop][:fields].each do |k,v| - hash["f.#{k}.hl.regex.slop"] = v - end - end - else - hash["hl.regex.slop"] = regex[:slop] - end - end - if regex[:pattern] - if regex[:pattern].kind_of? Hash - if regex[:pattern][:default] - hash["hl.regex.pattern"] = regex[:pattern][:default] - end - if regex[:pattern][:fields] - regex[:pattern][:fields].each do |k,v| - hash["f.#{k}.hl.regex.pattern"] = v - end - end - else - hash["hl.regex.pattern"] = regex[:pattern] - end - end - if regex[:max_analyzed_chars] - if regex[:max_analyzed_chars].kind_of? Hash - if regex[:max_analyzed_chars][:default] - hash["hl.regex.maxAnalyzedChars"] = regex[:max_analyzed_chars][:default] - end - if regex[:max_analyzed_chars][:fields] - regex[:max_analyzed_chars][:fields].each do |k,v| - hash["f.#{k}.hl.regex.maxAnalyzedChars"] = v - end - end - else - hash["hl.regex.maxAnalyzedChars"] = regex[:max_analyzed_chars] - end - end - end - - end - - if @params[:mlt] - hash[:mlt] = true - hash["mlt.count"] = @params[:mlt][:count] - hash["mlt.fl"] = @params[:mlt][:field_list].join(',') - hash["mlt.mintf"] = @params[:mlt][:min_term_freq] - hash["mlt.mindf"] = @params[:mlt][:min_doc_freq] - hash["mlt.minwl"] = @params[:mlt][:min_word_length] - hash["mlt.maxwl"] = @params[:mlt][:max_word_length] - hash["mlt.maxqt"] = @params[:mlt][:max_query_terms] - hash["mlt.maxntp"] = @params[:mlt][:max_tokens_parsed] - hash["mlt.boost"] = @params[:mlt][:boost] - end - - hash.merge(super.to_hash) - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/update.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request/update.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request/update.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request/update.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a parent class for all requests that go through the solr update handler -# TODO: Use new xml update handler for better error responses -class Solr::Request::Update < Solr::Request::Base - def response_format - :xml - end - - def handler - 'update' - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request.rb solr-3.3/client/ruby/solr-ruby/lib/solr/request.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/request.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/request.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module Solr; module Request; end; end -require 'solr/request/add_document' -require 'solr/request/modify_document' -require 'solr/request/base' -require 'solr/request/commit' -require 'solr/request/delete' -require 'solr/request/ping' -require 'solr/request/select' -require 'solr/request/standard' -require 'solr/request/spellcheck' -require 'solr/request/dismax' -require 'solr/request/update' -require 'solr/request/index_info' -require 'solr/request/optimize' diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/add_document.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/add_document.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/add_document.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/add_document.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::AddDocument < Solr::Response::Xml - def initialize(xml) - super - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/base.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/base.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/base.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/base.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Base - attr_reader :raw_response - - def initialize(raw_response) - @raw_response = raw_response - end - - # factory method for creating a Solr::Response::* from - # a request and the raw response content - def self.make_response(request, raw) - - # make sure response format seems sane - unless [:xml, :ruby].include?(request.response_format) - raise Solr::Exception.new("unknown response format: #{request.response_format}" ) - end - - # TODO: Factor out this case... perhaps the request object should provide the response class instead? Or dynamically align by class name? - # Maybe the request itself could have the response handling features that get mixed in with a single general purpose response object? - - begin - klass = eval(request.class.name.sub(/Request/,'Response')) - rescue NameError - raise Solr::Exception.new("unknown request type: #{request.class}") - else - klass.new(raw) - end - - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/commit.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/commit.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/commit.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/commit.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'rexml/xpath' - -class Solr::Response::Commit < Solr::Response::Xml -end - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/delete.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/delete.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/delete.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/delete.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Delete < Solr::Response::Xml; end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/dismax.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/dismax.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/dismax.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/dismax.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Dismax < Solr::Response::Standard - # no need for special processing - - # FIXME: 2007-02-07 -- The existence of this class indicates that - # the Request/Response pair architecture is a little hinky. Perhaps we could refactor - # out some of the most common functionality -- Common Query Parameters, Highlighting Parameters, - # Simple Facet Parameters, etc. -- into modules? -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/index_info.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/index_info.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/index_info.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/index_info.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::IndexInfo < Solr::Response::Ruby - def initialize(ruby_code) - super - end - - def num_docs - return @data['index']['numDocs'] - end - - def field_names - return @data['fields'].keys - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/modify_document.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/modify_document.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/modify_document.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/modify_document.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::ModifyDocument < Solr::Response::Xml - def initialize(xml) - super - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/optimize.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/optimize.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/optimize.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/optimize.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Optimize < Solr::Response::Commit -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/ping.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/ping.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/ping.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/ping.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'rexml/xpath' - -class Solr::Response::Ping < Solr::Response::Xml - - def initialize(xml) - super - @ok = REXML::XPath.first(@doc, './solr/ping') ? true : false - end - - # returns true or false depending on whether the ping - # was successful or not - def ok? - @ok - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/ruby.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/ruby.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/ruby.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/ruby.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Ruby < Solr::Response::Base - attr_reader :data, :header - - def initialize(ruby_code) - super - begin - #TODO: what about pulling up data/header/response to ResponseBase, - # or maybe a new middle class like SelectResponseBase since - # all Select queries return this same sort of stuff?? - # XML (&wt=xml) and Ruby (&wt=ruby) responses contain exactly the same structure. - # a goal of solrb is to make it irrelevant which gets used under the hood, - # but favor Ruby responses. - @data = eval(ruby_code) - @header = @data['responseHeader'] - raise "response should be a hash" unless @data.kind_of? Hash - raise "response header missing" unless @header.kind_of? Hash - rescue SyntaxError => e - raise Solr::Exception.new("invalid ruby code: #{e}") - end - end - - def ok? - @header['status'] == 0 - end - - def query_time - @header['QTime'] - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/select.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/select.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/select.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/select.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Select < Solr::Response::Ruby - def initialize(ruby_code) - super - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/spellcheck.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/spellcheck.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/spellcheck.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/spellcheck.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Spellcheck < Solr::Response::Ruby - attr_reader :suggestions - - def initialize(ruby_code) - super - @suggestions = @data['suggestions'] - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/standard.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/standard.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/standard.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/standard.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Response::Standard < Solr::Response::Ruby - FacetValue = Struct.new(:name, :value) - include Enumerable - - def initialize(ruby_code) - super - @response = @data['response'] - raise "response section missing" unless @response.kind_of? Hash - end - - def total_hits - @response['numFound'] - end - - def start - @response['start'] - end - - def hits - @response['docs'] - end - - def max_score - @response['maxScore'] - end - - # TODO: consider the use of json.nl parameter - def field_facets(field) - facets = [] - values = @data['facet_counts']['facet_fields'][field] - Solr::Util.paired_array_each(values) do |key, value| - facets << FacetValue.new(key, value) - end - - facets - end - - def highlighted(id, field) - @data['highlighting'][id.to_s][field.to_s] rescue nil - end - - # supports enumeration of hits - # TODO revisit - should this iterate through *all* hits by re-requesting more? - def each - @response['docs'].each {|hit| yield hit} - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/xml.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response/xml.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response/xml.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response/xml.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'rexml/document' -require 'solr/exception' - -class Solr::Response::Xml < Solr::Response::Base - attr_reader :doc, :status_code, :status_message - - def initialize(xml) - super - # parse the xml - @doc = REXML::Document.new(xml) - - # look for the result code and string - # - # - # 02 - # - result = REXML::XPath.first(@doc, './response/lst[@name="responseHeader"]/int[@name="status"]') - if result - @status_code = result.text - @status_message = result.text # TODO: any need for a message? - end - rescue REXML::ParseException => e - raise Solr::Exception.new("invalid response xml: #{e}") - end - - def ok? - return @status_code == '0' - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response.rb solr-3.3/client/ruby/solr-ruby/lib/solr/response.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/response.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/response.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module Solr; module Response; end; end -require 'solr/response/base' -require 'solr/response/xml' -require 'solr/response/ruby' -require 'solr/response/ping' -require 'solr/response/add_document' -require 'solr/response/modify_document' -require 'solr/response/standard' -require 'solr/response/spellcheck' -require 'solr/response/dismax' -require 'solr/response/commit' -require 'solr/response/delete' -require 'solr/response/index_info' -require 'solr/response/optimize' -require 'solr/response/select' \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/solrtasks.rb solr-3.3/client/ruby/solr-ruby/lib/solr/solrtasks.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/solrtasks.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/solrtasks.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -#!/usr/bin/env ruby -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: fill out Solr tasks: start, stop, ping, optimize, etc. - -require 'rake' -require 'rake/tasklib' - -module Solr - namespace :solr do - desc "Start Solr" - task :start do - # TODO: actually start it up! - puts "Starting..." - end - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/util.rb solr-3.3/client/ruby/solr-ruby/lib/solr/util.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/util.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/util.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class Solr::Util - # paired_array_each([key1,value1,key2,value2]) yields twice: - # |key1,value1| and |key2,value2| - def self.paired_array_each(a, &block) - 0.upto(a.size / 2 - 1) do |i| - n = i * 2 - yield(a[n], a[n+1]) - end - end - - # paired_array_to_hash([key1,value1,key2,value2]) => {key1 => value1, key2, value2} - def self.paired_array_to_hash(a) - Hash[*a] - end - - def self.query_parser_escape(string) - # backslash prefix everything that isn't a word character - string.gsub(/(\W)/,'\\\\\1') - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/xml.rb solr-3.3/client/ruby/solr-ruby/lib/solr/xml.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr/xml.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr/xml.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,47 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module Solr::XML -end - -begin - - # If we can load rubygems and libxml-ruby... - require 'rubygems' - require 'xml/libxml' - - # then make a few modifications to XML::Node so it can stand in for REXML::Element - class XML::Node - # element.add_element(another_element) should work - alias_method :add_element, :<< - - # element.attributes['blah'] should work - def attributes - self - end - - # element.text = "blah" should work - def text=(x) - self << x.to_s - end - end - - # And use XML::Node for our XML generation - Solr::XML::Element = XML::Node - -rescue LoadError => e # If we can't load either rubygems or libxml-ruby - - # Just use REXML. - require 'rexml/document' - Solr::XML::Element = REXML::Element - -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr.rb solr-3.3/client/ruby/solr-ruby/lib/solr.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/lib/solr.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/lib/solr.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module Solr; end -require 'solr/exception' -require 'solr/request' -require 'solr/connection' -require 'solr/response' -require 'solr/util' -require 'solr/xml' -require 'solr/importer' -require 'solr/indexer' diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/LICENSE.txt solr-3.3/client/ruby/solr-ruby/LICENSE.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/LICENSE.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/LICENSE.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/Rakefile solr-3.3/client/ruby/solr-ruby/Rakefile --- solr-1.4.0+ds1/client/ruby/solr-ruby/Rakefile 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/Rakefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,190 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# the default task is to run both the unit and functional tests -# functional tests require that a solr test server is running -# but this Rakefil should take care of starting and stopping it -# for you -# -# if you just want to run unit tests: -# -# rake test_units -# -# and if you just want to run functional tests -# -# rake test_functionals -# -# if you would like to see solr startup messages on STDERR -# when starting solr test server during functional tests use: -# -# rake SOLR_CONSOLE=true - -SOLR_RUBY_VERSION = '0.0.8' - -require 'rubygems' -require 'rake' -require 'rake/testtask' -require 'rake/rdoctask' -require 'rake/packagetask' -require 'rake/gempackagetask' -require 'test/functional/test_solr_server' - -task :default => [:test_units] - -SOLR_PARAMS = { - :quiet => ENV['SOLR_CONSOLE'] ? false : true, - :jetty_home => ENV['SOLR_JETTY_HOME'] || File.expand_path('../../../example'), - :jetty_port => ENV['SOLR_JETTY_PORT'] || 8888, - :solr_home => ENV['SOLR_HOME'] || File.expand_path('test') -} - - -spec = Gem::Specification.new do |s| - s.name = 'solr-ruby' - s.version = SOLR_RUBY_VERSION - s.author = 'Apache Solr' - s.email = 'ruby-dev@lucene.apache.org' - s.homepage = 'http://wiki.apache.org/solr/solr-ruby' - s.platform = Gem::Platform::RUBY - s.summary = 'Ruby library for working with Apache Solr' - - # Omit functional tests from gem for now, as that requires a Solr instance - s.files = Dir.glob("lib/**/*").concat(Dir.glob("test/unit/**/*")) - s.require_path = 'lib' - s.autorequire = 'solr' - s.has_rdoc = true -end - -namespace :gem do - Rake::GemPackageTask.new(spec) do |pkg| - pkg.need_zip = true - pkg.need_tar = true - pkg.package_dir = "pkg/gem" - end -end - -namespace :rails do - desc "Creates rails plugin structure and distributable packages. init.rb is created and removed on the fly." - task :package => "init.rb" do - File.rm_f("init.rb") - end - Rake::PackageTask.new("solr-ruby-rails", SOLR_RUBY_VERSION) do |pkg| - pkg.need_zip = true - pkg.need_tar = true - pkg.package_dir = "pkg/rails" - pkg.package_files.include("lib/**/*.rb", "test/unit/**/*.rb", "init.rb", "LICENSE.txt", "README") - end - - file "init.rb" do - open("init.rb", "w") do |file| - file.puts LICENSE - file.puts "require 'solr.rb'" - end - end - - desc "Install the Rails plugin version into the vendor/plugins dir. Need to set PLUGINS_DIR environment variable." - task :install_solr_ruby => :package do - plugins_dir = ENV["PLUGINS_DIR"] or raise "You must set PLUGINS_DIR" - mkdir File.join(plugins_dir, "solr-ruby-rails-#{SOLR_RUBY_VERSION}/") rescue nil - File.cp_r(File.join("pkg","rails", "solr-ruby-rails-#{SOLR_RUBY_VERSION}/"), plugins_dir) - end -end - -task :package => ["rails:package", "gem:package"] -task :repackage => [:clobber_package, :package] -task :clobber_package => ["rails:clobber_package", "gem:clobber_package"] do rm_r "pkg" rescue nil end -task :clobber => [:clobber_package] - -desc "Generate rdoc documentation" -Rake::RDocTask.new('doc') do |rd| - rd.rdoc_files.include("lib/**/*.rb") - rd.rdoc_files.include('README', 'CHANGES.yml', 'LICENSE.txt') - rd.main = 'README' - rd.rdoc_dir = 'doc' -end - -desc "Run unit tests" -Rake::TestTask.new(:test_units) do |t| - t.pattern = 'test/unit/*_test.rb' - t.verbose = true - t.ruby_opts = ['-r solr', '-r test/unit', '-Itest/unit'] -end - -# NOTE: test_functionals does not work standalone currently. It needs the TestSolrServer wrapper in the :test task -Rake::TestTask.new(:test_functionals) do |t| - t.pattern = 'test/functional/*_test.rb' - t.verbose = true - t.ruby_opts = ['-r solr', '-r test/unit', '-Itest/functional'] -end - -desc "Run unit and functional tests" -task :test => [:test_units] do - rm_rf "test/data" # remove functional test temp data directory - - # wrap functional tests with a test-specific Solr server - got_error = TestSolrServer.wrap(SOLR_PARAMS) do - Rake::Task[:test_functionals].invoke - end - - raise "test failures" if got_error -end - -# TODO: consider replacing system() to rcov with the included -# Rake task: http://eigenclass.org/hiki.rb?cmd=view&p=rcov+FAQ&key=rake -namespace :test do - desc 'Measures test coverage' - # borrowed from here: http://clarkware.com/cgi/blosxom/2007/01/05#RcovRakeTask - task :coverage do - rm_rf "coverage" - rm_rf "coverage.data" - TestSolrServer.wrap(SOLR_PARAMS) do - system("rcov --aggregate coverage.data --text-summary -Ilib:test/functional test/functional/*_test.rb") - end - system("rcov --aggregate coverage.data --text-summary -Ilib:test/unit test/unit/*_test.rb") - system("open coverage/index.html") if PLATFORM['darwin'] - end -end - - -def egrep(pattern) - Dir['**/*.rb'].each do |fn| - count = 0 - open(fn) do |f| - while line = f.gets - count += 1 - if line =~ pattern - puts "#{fn}:#{count}:#{line}" - end - end - end - end -end - -desc "Report TODO/FIXME/TBD tags in the code" -task :todo do - egrep /#.*(FIXME|TODO|TBD)/ -end - -LICENSE = < :on) - - # add a document to the index - conn.add(:id => 123, :title_text => 'Lucene in Action') - - # update the document - conn.update(:id => 123, :title_text => 'Solr in Action') - - # print out the first hit in a query for 'action' - response = conn.query('action') - print response.hits[0] - - # iterate through all the hits for 'action' - conn.query('action') do |hit| - puts hit.inspect - end - - # delete document by id - conn.delete(123) - -INSTALLATION - -First run the tests: - - rake - -then build the gem: - - rake package - -and install the versioned gem: - - gem install pkg/solr-x.x.x.gem - -LICENSE - -This package is licensed using the Apache Software License 2.0. - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/script/setup.rb solr-3.3/client/ruby/solr-ruby/script/setup.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/script/setup.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/script/setup.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -include Solr diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/script/solrshell solr-3.3/client/ruby/solr-ruby/script/solrshell --- solr-1.4.0+ds1/client/ruby/solr-ruby/script/solrshell 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/script/solrshell 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -#!/usr/bin/env ruby -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -solr_lib = File.dirname(__FILE__) + '/../lib/solr' -setup = File.dirname(__FILE__) + '/setup' -irb_name = RUBY_PLATFORM =~ /mswin32/ ? 'irb.bat' : 'irb' - -exec "#{irb_name} -r #{solr_lib} -r #{setup} --simple-prompt" diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/admin-extra.html solr-3.3/client/ruby/solr-ruby/solr/conf/admin-extra.html --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/admin-extra.html 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/admin-extra.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ - - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/protwords.txt solr-3.3/client/ruby/solr-ruby/solr/conf/protwords.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/protwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/protwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/schema.xml solr-3.3/client/ruby/solr-ruby/solr/conf/schema.xml --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/schema.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/schema.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,221 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - text - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/scripts.conf solr-3.3/client/ruby/solr-ruby/solr/conf/scripts.conf --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/scripts.conf 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/scripts.conf 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -user= -solr_hostname=localhost -solr_port=8983 -rsyncd_port=18983 -data_dir= -webapp_name=solr -master_host= -master_data_dir= -master_status_dir= diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/solrconfig.xml solr-3.3/client/ruby/solr-ruby/solr/conf/solrconfig.xml --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,394 +0,0 @@ - - - - - - ${solr.abortOnConfigurationError:true} - - - ${solr.data.dir:./solr/data} - - - - false - 32 - 10 - 2147483647 - 10000 - 1000 - 10000 - - - - - false - 32 - 10 - 2147483647 - 10000 - - - false - - - - - - - - - - - - - - - - - - - 1024 - - - - - - - - - - - - - false - - - - - - - - 10 - - - - - - - - - - - - - - - - - - false - - - 4 - - - - - - - - - - - - - - - explicit - - - - - - - - - - explicit - text^1.0 title_text^2.0 description_text^1.5 id^10.0 - 2<-1 5<-2 6<90% - *:* - - - - - - - on - 1 - genre_facet - medium_facet - rating_facet - publisher_facet - - - - - - - - - 1 - 0.5 - - - - - - - - spell - - - - - word - - - - - - - - - - - - - - - - explicit - true - - - - - - - - 5 - - - - - solr - - - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/stopwords.txt solr-3.3/client/ruby/solr-ruby/solr/conf/stopwords.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/stopwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/stopwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -#Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -s -such -t -that -the -their -then -there -these -they -this -to -was -will -with - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/synonyms.txt solr-3.3/client/ruby/solr-ruby/solr/conf/synonyms.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/synonyms.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/synonyms.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaa => aaaa -bbb => bbbb1 bbbb2 -ccc => cccc1,cccc2 -a\=>a => b\=>b -a\,a => b\,b -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/xslt/example.xsl solr-3.3/client/ruby/solr-ruby/solr/conf/xslt/example.xsl --- solr-1.4.0+ds1/client/ruby/solr-ruby/solr/conf/xslt/example.xsl 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/solr/conf/xslt/example.xsl 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - <xsl:value-of select="$title"/> - - - -

-
- This has been formatted by the sample "example.xsl" transform - - use your own XSLT to get a nicer page -
- - - -
- - - -
- - - - -
-
-
- - - - - - - - - - - - - - javascript:toggle("");? -
- - exp - - - - - -
- - -
- - - - - - - -
    - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - -
diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/admin-extra.html solr-3.3/client/ruby/solr-ruby/test/conf/admin-extra.html --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/admin-extra.html 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/admin-extra.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ - - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/protwords.txt solr-3.3/client/ruby/solr-ruby/test/conf/protwords.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/protwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/protwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/schema.xml solr-3.3/client/ruby/solr-ruby/test/conf/schema.xml --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/schema.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/schema.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,237 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - text - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/scripts.conf solr-3.3/client/ruby/solr-ruby/test/conf/scripts.conf --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/scripts.conf 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/scripts.conf 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -user= -solr_hostname=localhost -solr_port=8983 -rsyncd_port=18983 -data_dir= -webapp_name=solr -master_host= -master_data_dir= -master_status_dir= diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/solrconfig.xml solr-3.3/client/ruby/solr-ruby/test/conf/solrconfig.xml --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,376 +0,0 @@ - - - - - - ${solr.abortOnConfigurationError:true} - - - - - - - false - 32 - 10 - 2147483647 - 10000 - 1000 - 10000 - - - - - false - 32 - 10 - 2147483647 - 10000 - - - false - - - - - - - - - - - - - - - - - - - 1024 - - - - - - - - - - - - - false - - - - - - - - 10 - - - - - - - - - - - - - - - - - - false - - - 4 - - - - - - - - - - - - - - - explicit - - - - - - - - explicit - 0.01 - - text^0.5 - - - - - - - - - id,test - - - 2<-1 5<-2 6<90% - - 100 - *:* - - - - - - - - - - 1 - 0.5 - - - - - - - - spell - - - - - word - - - - - - - - - - - - - - - - explicit - true - - - - - - - - 5 - - - - - solr - - - - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/stopwords.txt solr-3.3/client/ruby/solr-ruby/test/conf/stopwords.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/stopwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/stopwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -#Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -s -such -t -that -the -their -then -there -these -they -this -to -was -will -with - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/synonyms.txt solr-3.3/client/ruby/solr-ruby/test/conf/synonyms.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/conf/synonyms.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/conf/synonyms.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaa => aaaa -bbb => bbbb1 bbbb2 -ccc => cccc1,cccc2 -a\=>a => b\=>b -a\,a => b\,b -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/functional/server_test.rb solr-3.3/client/ruby/solr-ruby/test/functional/server_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/functional/server_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/functional/server_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,218 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class BadRequest < Solr::Request::Standard - def response_format - :invalid - end -end - -class ServerTest < Test::Unit::TestCase - include Solr - - def setup - @connection = Connection.new("http://localhost:8888/solr", :autocommit => :on) - clean - end - - def test_full_lifecycle - # make sure autocommit is on - assert @connection.autocommit - - # make sure this doc isn't there to begin with - @connection.delete(123456) - - # add it - @connection.add(:id => 123456, :text => 'Borges') # add :some_date => 'NOW/HOUR' to test richer data type handling - # now = DateTime.now - - # look for it - response = @connection.query('Borges') - assert_equal 1, response.total_hits - hit = response.hits[0] - assert_equal '123456', hit['id'] - # assert_equal now.year, hit['whatever_date'].year - - # look for it via dismax - response = @connection.search('Borges') - assert_equal 1, response.total_hits - assert_equal '123456', response.hits[0]['id'] - - # delete it - @connection.delete(123456) - - # make sure it's gone - response = @connection.query('Borges') - assert_equal 0, response.total_hits - end - - def test_i18n_full_lifecycle - # make sure autocommit is on - assert @connection.autocommit - - # make sure this doc isn't there to begin with - @connection.delete(123456) - - # add it - @connection.add(:id => 123456, :text => 'Åäöêâîôû') - - # look for it - response = @connection.query('Åäöêâîôû') - assert_equal 1, response.total_hits - assert_equal '123456', response.hits[0]['id'] - - # delete it - @connection.delete(123456) - - # make sure it's gone - response = @connection.query('Åäöêâîôû Öëäïöü') - assert_equal 0, response.total_hits - end - - def test_sorting - @connection.add(:id => 1, :text => 'aaa woot') - @connection.add(:id => 2, :text => 'bbb woot') - @connection.add(:id => 3, :text => 'ccc woot') - @connection.commit - - results = @connection.query('woot', :sort => [:id => :descending], :rows => 2) - assert_equal([3, 2], results.hits.map { |h| h['id'].to_i }) - - results = @connection.search('woot', :sort => [:id => :descending], :rows => 2) - assert_equal([3, 2], results.hits.map { |h| h['id'].to_i }) - - @connection.delete_by_query("id:1 OR id:2 OR id:3") - end - - def test_bad_connection - conn = Solr::Connection.new 'http://127.0.0.1:9999/invalid' - begin - conn.send(Solr::Request::Ping.new) - flunk "Expected exception not raised" - rescue ::Exception - # expected - assert true - end - end - - def test_bad_url - conn = Solr::Connection.new 'http://localhost:8888/invalid' - assert_raise(Net::HTTPServerException) do - conn.send(Solr::Request::Ping.new) - end - end - - def test_commit - response = @connection.send(Solr::Request::Commit.new) - assert response.ok? - end - - def test_optimize - response = @connection.send(Solr::Request::Optimize.new) - assert response.ok? - end - -# TODO: add test_ping back... something seems to have changed with the response, so adjustments are needed. -# non-critical - if Solr is broken we'll know from other tests! -# def test_ping -# assert_equal true, @connection.ping -# end - - def test_delete_with_query - assert_equal true, @connection.delete_by_query('[* TO *]') - end - - def test_ping_with_bad_server - conn = Solr::Connection.new 'http://localhost:8888/invalid' - assert_equal false, conn.ping - end - - def test_invalid_response_format - request = BadRequest.new(:query => "solr") - assert_raise(Solr::Exception) do - @connection.send(request) - end - end - - def test_escaping - doc = Solr::Document.new :id => 47, :ruby_text => 'puts "ouch!"' - @connection.add(doc) - @connection.commit - - request = Solr::Request::Standard.new :query => 'ouch' - result = @connection.send(request) - - assert_match /puts/, result.raw_response - end - - def test_add_document - doc = {:id => 999, :text => 'hi there!'} - request = Solr::Request::AddDocument.new(doc) - response = @connection.send(request) - assert response.status_code == '0' - end - - def test_update - @connection.update(:id => 999, :text => 'update test') - end - - def test_no_such_field - doc = {:id => 999, :bogus => 'foo'} - request = Solr::Request::AddDocument.new(doc) - assert_raise(Net::HTTPServerException) do - response = @connection.send(request) - end - # assert_equal false, response.ok? - # assert_match "ERROR:unknown field 'bogus'", response.status_message - end - - def test_index_info - doc = {:id => 999, :test_index_facet => 'value'} - @connection.add(doc) - ii = Solr::Request::IndexInfo.new - info = @connection.send(Solr::Request::IndexInfo.new) - assert info.field_names.include?("id") && info.field_names.include?("test_index_facet") - assert_equal 1, info.num_docs - end - - def test_highlighting - @connection.add(:id => 1, :title_text => "Apache Solr") - - request = Solr::Request::Standard.new(:query => 'solr', - :highlighting => { - :field_list => ['title_text'], - :max_snippets => 3, - :prefix => ">>", - :suffix => "<<" - } - ) - - response = @connection.send(request) - assert_equal ["Apache >>Solr<<"], response.highlighted(1, :title_text) - end - - def test_entities - @connection.add(:id => 1, :title_text => " ") - response = @connection.query('nbsp') - assert_equal 1, response.total_hits - assert_equal '1', response.hits[0]['id'] - end - - # wipe the index clean - def clean - @connection.delete_by_query('*:*') - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/functional/test_solr_server.rb solr-3.3/client/ruby/solr-ruby/test/functional/test_solr_server.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/functional/test_solr_server.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/functional/test_solr_server.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A singleton class for starting/stopping a Solr server for testing purposes -# The behavior of TestSolrServer can be modified prior to start() by changing -# port, solr_home, and quiet properties. - -class TestSolrServer - require 'singleton' - include Singleton - attr_accessor :port, :jetty_home, :solr_home, :quiet - - # configure the singleton with some defaults - def initialize - @pid = nil - end - - def self.wrap(params = {}) - error = false - solr_server = self.instance - solr_server.quiet = params[:quiet] || true - solr_server.jetty_home = params[:jetty_home] - solr_server.solr_home = params[:solr_home] - solr_server.port = params[:jetty_port] || 8888 - begin - puts "starting solr server on #{RUBY_PLATFORM}" - solr_server.start - sleep params[:startup_wait] || 5 - yield - rescue - error = true - ensure - puts "stopping solr server" - solr_server.stop - end - - return error - end - - def jetty_command - "java -Djetty.port=#{@port} -Dsolr.solr.home=#{@solr_home} -jar start.jar" - end - - def start - puts "jetty_home: #{@jetty_home}" - puts "solr_home: #{@solr_home}" - puts "jetty_command: #{jetty_command}" - platform_specific_start - end - - def stop - platform_specific_stop - end - - if RUBY_PLATFORM =~ /mswin32/ - require 'win32/process' - - # start the solr server - def platform_specific_start - Dir.chdir(@jetty_home) do - @pid = Process.create( - :app_name => jetty_command, - :creation_flags => Process::DETACHED_PROCESS, - :process_inherit => false, - :thread_inherit => true, - :cwd => "#{@jetty_home}" - ).process_id - end - end - - # stop a running solr server - def platform_specific_stop - Process.kill(1, @pid) - Process.wait - end - else # Not Windows - # start the solr server - def platform_specific_start - puts self.inspect - Dir.chdir(@jetty_home) do - @pid = fork do - STDERR.close if @quiet - exec jetty_command - end - end - end - - # stop a running solr server - def platform_specific_stop - Process.kill('TERM', @pid) - Process.wait - end - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/add_document_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/add_document_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/add_document_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/add_document_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr_mock_base' - -class AddDocumentTest < SolrMockBaseTestCase - - def test_add_document_response - conn = Solr::Connection.new('http://localhost:9999/solr') - set_post_return('02') - doc = {:id => '123', :text => 'Tlon, Uqbar, Orbis Tertius'} - response = conn.send(Solr::Request::AddDocument.new(doc)) - assert_equal true, response.ok? - end - - def test_bad_add_document_response - conn = Solr::Connection.new('http://localhost:9999/solr') - set_post_return('12') - doc = {:id => '123', :text => 'Tlon, Uqbar, Orbis Tertius'} - response = conn.send(Solr::Request::AddDocument.new(doc)) - assert_equal false, response.ok? - end - - def test_shorthand - conn = Solr::Connection.new('http://localhost:9999/solr') - set_post_return('02') - doc = {:id => '123', :text => 'Tlon, Uqbar, Orbis Tertius'} - assert_equal true, conn.add(:id => '123', :text => 'Tlon, Uqbar, Orbis Tetius') - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/array_mapper_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/array_mapper_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/array_mapper_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/array_mapper_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr' -require 'test/unit' - -include Solr::Importer - -class ArrayMapperTest < Test::Unit::TestCase - def test_simple - mapping1 = {:one => "uno"} - mapping2 = {:two => "dos"} - - mapper = Solr::Importer::ArrayMapper.new([Mapper.new(mapping1),Mapper.new(mapping2)]) - mapped_data = mapper.map([{},{}]) - assert_equal "uno", mapped_data[:one] - assert_equal "dos", mapped_data[:two] - end - - def test_field_conflict_goes_to_last - mapping1 = {:same => "uno"} - mapping2 = {:same => "dos"} - - mapper = Solr::Importer::ArrayMapper.new([Mapper.new(mapping1),Mapper.new(mapping2)]) - mapped_data = mapper.map([{},{}]) - assert_equal "dos", mapped_data[:same] - end -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/changes_yaml_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/changes_yaml_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/changes_yaml_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/changes_yaml_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' - -class ChangesYamlTest < Test::Unit::TestCase - def test_parse - change_log = YAML.load_file(File.expand_path(File.dirname(__FILE__)) + "/../../CHANGES.yml") - assert_equal Date.parse("2007-02-15"), change_log["v0.0.1"]["release_date"] - assert_equal ["initial release"], change_log["v0.0.1"]["changes"] - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/commit_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/commit_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/commit_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/commit_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr_mock_base' - -class CommitTest < SolrMockBaseTestCase - - def test_commit - xml = '02' - conn = Solr::Connection.new('http://localhost:9999/solr') - set_post_return(xml) - response = conn.send(Solr::Request::Commit.new) - assert_kind_of Solr::Response::Commit, response - assert_equal true, response.ok? - - # test shorthand - assert_equal true, conn.commit - end - - # def test_invalid_commit - # xml = '12' - # conn = Solr::Connection.new('http://localhost:9999/solr') - # set_post_return(xml) - # response = conn.send(Solr::Request::Commit.new) - # assert_kind_of Solr::Response::Commit, response - # assert_equal false, response.ok? - # - # # test shorthand - # assert_equal false, conn.commit - # end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/connection_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/connection_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/connection_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/connection_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' -require 'solr_mock_base' - -class ConnectionTest < SolrMockBaseTestCase - def test_mock - connection = Connection.new("http://localhost:9999") - set_post_return("foo") - assert_equal "foo", connection.post(Solr::Request::AddDocument.new) - end - - def test_bad_url - assert_raise(RuntimeError) do - Connection.new("ftp://localhost:9999") - end - end - - def test_connection_initialize - connection = Solr::Connection.new("http://localhost:8983/solr") - assert_equal 'localhost', connection.url.host - assert_equal 8983, connection.url.port - assert_equal '/solr', connection.url.path - end - - def test_non_standard_context - connection = Solr::Connection.new("http://localhost:8983/index") - assert_equal '/index', connection.url.path - end - - def test_xml_response - connection = Connection.new("http://localhost:9999") - set_post_return "" - response = connection.send(Solr::Request::Ping.new) - assert_equal "", response.raw_response - end - - def test_ruby_response - connection = Connection.new("http://localhost:9999") - set_post_return "{'responseHeader' => {}, 'response' => {}}" - response = connection.send(Solr::Request::Standard.new(:query => 'foo')) - assert_equal({'responseHeader' => {}, 'response' => {}}, response.data) - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/data_mapper_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/data_mapper_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/data_mapper_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/data_mapper_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,75 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr' -require 'test/unit' - -class DataMapperTest < Test::Unit::TestCase - - def test_static_mapping - mapping = {:static => "value", - :static_array => ["value1", "value2"]} - - mapper = Solr::Importer::Mapper.new(mapping) - mapped_data = mapper.map({}) - - assert_equal "value", mapped_data[:static] - assert_equal ["value1", "value2"], mapped_data[:static_array] - end - - def test_simple_mapping - orig_data = {:orig_field => "value", - :multi1 => "val1", :multi2 => "val2"} - mapping = {:solr_field => :orig_field, - :mapped_array => [:multi1, :multi2], } - - mapper = Solr::Importer::Mapper.new(mapping) - mapped_data = mapper.map(orig_data) - - assert_equal "value", mapped_data[:solr_field] - assert_equal ["val1", "val2"], mapped_data[:mapped_array] - end - - def test_proc - orig_data = {:orig_field => "value"} - mapping = {:solr_field => Proc.new {|record| ">#{record[:orig_field]}<"}} - - mapper = Solr::Importer::Mapper.new(mapping) - mapped_data = mapper.map(orig_data) - - assert_equal ">value<", mapped_data[:solr_field] - end - - def test_overridden_field - mapping = {:solr_field => [:orig_field1, :orig_field2]} - orig_data = {:orig_field1 => "value1", :orig_field2 => "value2", } - - mapper = Solr::Importer::Mapper.new(mapping) - def mapper.field_data(orig_data, field_name) - ["~#{super(orig_data, field_name)}~"] # array tests that the value is flattened - end - mapped_data = mapper.map(orig_data) - - assert_equal ["~value1~", "~value2~"], mapped_data[:solr_field] - end - - def test_unknown_mapping - mapping = {:solr_field => /foo/} # regexp currently not a valid mapping type - - mapper = Solr::Importer::Mapper.new(mapping) - - assert_raise(RuntimeError) do - mapped_data = mapper.map({}) - end - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/delete_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/delete_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/delete_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/delete_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,56 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr_mock_base' - -class DeleteTest < SolrMockBaseTestCase - - def test_delete_request - request = Solr::Request::Delete.new(:id => '123') - assert_match(/[\s]*123<\/id>[\s]*<\/delete>/m, request.to_s) - end - - def test_delete_by_query_request - request = Solr::Request::Delete.new(:query => 'name:summers') - assert_match(/[\s]*name:summers<\/query>[\s]*<\/delete>/m, request.to_s) - end - - def test_delete_response - conn = Solr::Connection.new 'http://localhost:9999/solr' - set_post_return('02') - response = conn.send(Solr::Request::Delete.new(:id => 123)) - assert_equal true, response.ok? - end - - def test_bad_delete_request - assert_raise(Solr::Exception) do - Solr::Request::Delete.new(:bogus => :param) - end - - assert_raise(Solr::Exception) do - Solr::Request::Delete.new(:id => 529, :query => "id:529") - end - end - - def test_bad_delete_response - conn = Solr::Connection.new 'http://localhost:9999/solr' - set_post_return('uhoh') - response = conn.send(Solr::Request::Delete.new(:id => 123)) - assert_equal false, response.ok? - end - - def test_delete_by_i18n_query_request - request = Solr::Request::Delete.new(:query => 'ëäïöü') - assert_match(/[\s]*ëäïöü<\/query>[\s]*<\/delete>/m, request.to_s) - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/delimited_file_source_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/delimited_file_source_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/delimited_file_source_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/delimited_file_source_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr' -require 'test/unit' - -class DelimitedFileSourceTest < Test::Unit::TestCase - - def test_load - filename = File.expand_path(File.dirname(__FILE__)) + "/tab_delimited.txt" - - source = Solr::Importer::DelimitedFileSource.new(filename,/\t/) - assert_equal source.to_a.size, 1 - - source.each do |data| - assert_equal data[:asin], '0865681740' - end - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/dismax_request_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/dismax_request_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/dismax_request_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/dismax_request_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class DismaxRequestTest < Test::Unit::TestCase - - def test_basic_query - request = Solr::Request::Dismax.new(:query => 'query', :phrase_slop => '1000', :sort => [{:deedle => :descending}]) - assert_match(/q=query/, request.to_s) - assert_match(/qt=dismax/, request.to_s) - assert_match(/ps=1000/, request.to_s) - assert_match(/sort=deedle%20desc/, request.to_s) - end - -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/document_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/document_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/document_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/document_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,69 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class DocumentTest < Test::Unit::TestCase - - def test_xml - doc = Solr::Document.new - doc << Solr::Field.new(:creator => 'Erik Hatcher') - assert_kind_of Solr::XML::Element, doc.to_xml - assert_match(/[\s]*Erik Hatcher<\/field>[\s]*<\/doc>/m, doc.to_xml.to_s) - end - - def test_repeatable - doc = Solr::Document.new - doc << Solr::Field.new(:creator => 'Erik Hatcher') - doc << Solr::Field.new(:creator => 'Otis Gospodnetic') - assert_kind_of Solr::XML::Element, doc.to_xml - assert_match(/[\s]*Erik Hatcher<\/field>[\s]*Otis Gospodnetic<\/field>[\s]*<\/doc>/m, doc.to_xml.to_s) - end - - def test_repeatable_in_hash - doc = Solr::Document.new({:creator => ['Erik Hatcher', 'Otis Gospodnetic']}) - assert_match(/[\s]*Erik Hatcher<\/field>[\s]*Otis Gospodnetic<\/field>[\s]*<\/doc>/m, doc.to_xml.to_s) - end - - def test_bad_doc - doc = Solr::Document.new - assert_raise(RuntimeError) do - doc << "invalid" - end - end - - def test_hash_shorthand - doc = Solr::Document.new :creator => 'Erik Hatcher', :title => 'Lucene in Action' - assert_equal 'Erik Hatcher', doc[:creator] - assert_equal 'Lucene in Action', doc[:title] - assert_equal nil, doc[:foo] - - doc = Solr::Document.new - doc << {:creator => 'Erik Hatcher', :title => 'Lucene in Action'} - doc[:subject] = 'Search' - assert_equal 'Erik Hatcher', doc[:creator] - assert_equal 'Lucene in Action', doc[:title] - assert_equal 'Search', doc[:subject] - end - - def test_boost - doc = Solr::Document.new :name => "McGrump" - doc.boost = 300.28 - assert_match(/[\s]*McGrump<\/field>[\s]*<\/doc>/, doc.to_xml.to_s) - end - - def test_string_values - doc = Solr::Document.new :name => "multi\nline" - assert_match(/[\s]*multi\nline<\/field>[\s]*<\/doc>/, doc.to_xml.to_s) - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/field_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/field_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/field_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/field_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class FieldTest < Test::Unit::TestCase - - def test_xml - field = Solr::Field.new :creator => 'Erik Hatcher' - assert_kind_of Solr::XML::Element, field.to_xml - assert_match(/Erik Hatcher<\/field>/, field.to_xml.to_s) - end - - def test_escaped_xml - field = Solr::Field.new :creator => 'Erik Hatcher & His Amazing Leaping Ability' - assert_kind_of Solr::XML::Element, field.to_xml - assert_match(/Erik Hatcher & His Amazing Leaping Ability<\/field>/, field.to_xml.to_s) - end - - def test_xml_date - field = Solr::Field.new :time => Time.now - assert_kind_of Solr::XML::Element, field.to_xml - assert_match(/[\d]{4}-[\d]{2}-[\d]{2}T[\d]{2}:[\d]{2}:[\d]{2}Z<\/field>/, field.to_xml.to_s) - end - - def test_i18n_xml - field = Solr::Field.new :i18nstring => 'Äêâîôû Öëäïöü' - assert_kind_of Solr::XML::Element, field.to_xml - assert_match(/Äêâîôû Öëäïöü<\/field>/m, field.to_xml.to_s) - end - - def test_boost_values - field = Solr::Field.new(:blah => "squee", :boost => 3.0) - assert_kind_of Solr::XML::Element, field.to_xml - assert_match(/squee<\/field>/, field.to_xml.to_s) - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/hpricot_mapper_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/hpricot_mapper_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/hpricot_mapper_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/hpricot_mapper_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -begin - require 'solr' - require 'test/unit' - require 'hpricot' - - class HpricotMapperTest < Test::Unit::TestCase - - def setup - @doc = open(File.expand_path(File.dirname(__FILE__)) + "/hpricot_test_file.xml"){|f| Hpricot.XML(f)} - end - - def test_simple_hpricot_path - mapping = {:field1 => :'child[@attribute="attribute1"]', - :field2 => :'child[@attribute="attribute2"]', - :field3 => :'child[@attribute="attribute3"]', - :field4 => :'child[@attribute="attribute3"] grandchild', - :field5 => :'child'} - - mapper = Solr::Importer::HpricotMapper.new(mapping) - mapped_data = mapper.map(@doc) - - assert_equal ['text1'], mapped_data[:field1] - assert_equal ['text2'], mapped_data[:field2] - assert_equal ['text3grandchild 3 text'], mapped_data[:field3] - assert_equal ['grandchild 3 text'], mapped_data[:field4] - assert_equal ['text1', 'text2', 'text3grandchild 3 text'], mapped_data[:field5] - end - - end -rescue LoadError => e - puts "HpricotMapperTest not run because #{e}" -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/hpricot_test_file.xml solr-3.3/client/ruby/solr-ruby/test/unit/hpricot_test_file.xml --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/hpricot_test_file.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/hpricot_test_file.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ - - - - text1 - text2 - text3grandchild 3 text - - \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/indexer_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/indexer_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/indexer_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/indexer_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,57 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class Solr::Indexer - attr_reader :added - def add_docs(doc) - @added ||= [] - @added << doc - end -end - -class IndexerTest < Test::Unit::TestCase - def test_mapping_or_mapping - mapping = {:field => "foo"} - indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true) - indexer.index - assert_equal 3, indexer.added.size - - indexer = Solr::Indexer.new([1,2,3,4], Solr::Importer::Mapper.new(mapping), :debug => true) - indexer.index - assert_equal 4, indexer.added.size - end - - def test_batch - mapping = {:field => "foo"} - indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true, :buffer_docs => 2) - indexer.index - assert_equal 2, indexer.added.size - end - -end - - -# source = DataSource.new -# -# mapping = { -# :id => :isbn, -# :name => :author, -# :source => "BOOKS", -# :year => Proc.new {|record| record.date[0,4] }, -# } -# -# Solr::Indexer.index(source, mapper) do |orig_data, solr_document| -# solr_document[:timestamp] = Time.now -# end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/modify_document_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/modify_document_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/modify_document_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/modify_document_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class ModifyDocumentTest < Test::Unit::TestCase - - def test_update_formatting - request = Solr::Request::ModifyDocument.new(:id => 10, :overwrite => {:name => ['val1', 'val2'], :copyfield => nil}) - assert_equal :xml, request.response_format - assert_match /copyfield\:OVERWRITE/, request.handler - assert_match /name\:OVERWRITE/, request.handler - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/ping_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/ping_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/ping_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/ping_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr_mock_base' - -class PingTest < SolrMockBaseTestCase - - def test_ping_response - xml = -< - - - - - - -PING_RESPONSE - conn = Solr::Connection.new('http://localhost:9999') - set_post_return(xml) - response = conn.send(Solr::Request::Ping.new) - assert_kind_of Solr::Response::Ping, response - assert_equal true, response.ok? - - # test shorthand - assert true, conn.ping - end - - def test_bad_ping_response - xml = "bar" - conn = Solr::Connection.new('http://localhost:9999') - set_post_return(xml) - response = conn.send(Solr::Request::Ping.new) - assert_kind_of Solr::Response::Ping, response - assert_equal false, response.ok? - - # test shorthand - assert_equal false, conn.ping - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/request_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/request_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/request_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/request_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class BadRequest < Solr::Request::Base -end - -class RequestTest < Test::Unit::TestCase - - def test_commit_request - request = Solr::Request::Commit.new - assert_equal :xml, request.response_format - assert_equal 'update', request.handler - assert_match(//, request.to_s) - end - - def test_add_doc_request - request = Solr::Request::AddDocument.new(:title => "title") - assert_match(/[\s]*[\s]*title<\/field>[\s]*<\/doc>[\s]*<\/add>/m, request.to_s) - assert_equal :xml, request.response_format - assert_equal 'update', request.handler - - assert_raise(RuntimeError) do - Solr::Request::AddDocument.new("invalid") - end - end - - def test_add_multidoc_request - request = Solr::Request::AddDocument.new([{:title => "title1"}, {:title => "title2"}]) - assert_match(/[\s]*[\s]*title1<\/field>[\s]*<\/doc>[\s]*[\s]*title2<\/field>[\s]*<\/doc>[\s]*<\/add>/m, request.to_s) - assert_equal :xml, request.response_format - assert_equal 'update', request.handler - end - - def test_ping_request - request = Solr::Request::Ping.new - assert_equal :xml, request.response_format - end - - def test_bad_request_class - assert_raise(RuntimeError) do - BadRequest.new.response_format - end - - assert_raise(RuntimeError) do - BadRequest.new.handler - end - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/response_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/response_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/response_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/response_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' -require 'solr_mock_base' - - -class ResponseTest < SolrMockBaseTestCase - - def test_response_xml_error - begin - Solr::Response::Xml.new("invalid xml&") - flunk("failed to get Solr::Exception as expected") - rescue Exception => exception - assert_kind_of Solr::Exception, exception - assert_match 'invalid response xml', exception.to_s - end - end - - def test_invalid_ruby - assert_raise(Solr::Exception) do - Solr::Response::Ruby.new(' {...') - end - end - - # This is now an acceptable use of Select, for the default request handler with no parameters (other than &wt=ruby) - # def test_bogus_request_handling - # assert_raise(Solr::Exception) do - # Solr::Response::Base.make_response(Solr::Request::Select.new, "response data") - # end - # end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/select_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/select_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/select_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/select_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class SelectTest < Test::Unit::TestCase - - def test_basic_query - request = Solr::Request::Select.new('custom', :q => 'query') - assert_equal :ruby, request.response_format - assert_equal 'select', request.handler - assert_equal 'query', request.to_hash[:q] - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/solr_mock_base.rb solr-3.3/client/ruby/solr-ruby/test/unit/solr_mock_base.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/solr_mock_base.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/solr_mock_base.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -# TODO: Maybe replace this with flexmock -class SolrMockBaseTestCase < Test::Unit::TestCase - include Solr - - def setup - Connection.send(:alias_method, :orig_post, :post) - end - - def teardown - Connection.send(:alias_method, :post, :orig_post) - end - - def set_post_return(value) - Connection.class_eval %{ - def post(request) - %q{#{value}} - end - } - end - - def test_dummy - # So Test::Unit is happy running this class - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/spellchecker_request_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/spellchecker_request_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/spellchecker_request_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/spellchecker_request_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class SpellcheckRequestTest < Test::Unit::TestCase - def test_spellcheck_request - request = Solr::Request::Spellcheck.new(:query => 'whateva', :suggestion_count => 5, :accuracy => 0.7, :only_more_popular => true) - assert_equal :ruby, request.response_format - assert_equal 'select', request.handler - hash = request.to_hash - assert_equal 'whateva', hash[:q] - assert_equal 5, hash[:suggestionCount] - assert_equal 0.7, hash[:accuracy] - assert_equal true, hash[:onlyMorePopular] - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/spellcheck_response_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/spellcheck_response_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/spellcheck_response_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/spellcheck_response_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr_mock_base' - -class SpellcheckResponseTest < SolrMockBaseTestCase - def test_basic - ruby_code = "{'responseHeader'=>{'status'=>0,'QTime'=>5},'suggestions'=>['whately','whatcha','whatever']}" - conn = Solr::Connection.new 'http://localhost:9999' - set_post_return(ruby_code) - response = conn.send(Solr::Request::Spellcheck.new(:query => 'whateva')) - assert_equal true, response.ok? - assert_equal 3, response.suggestions.size - assert_equal ['whately','whatcha','whatever'], response.suggestions - end -end - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/standard_request_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/standard_request_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/standard_request_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/standard_request_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,324 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'test/unit' -require 'solr' - -class StandardRequestTest < Test::Unit::TestCase - - def test_basic_query - request = Solr::Request::Standard.new(:query => 'query') - assert_equal :ruby, request.response_format - assert_equal 'select', request.handler - assert_equal 'query', request.to_hash[:q] - assert_match /q=query/, request.to_s - end - - def test_bad_params - assert_raise(RuntimeError) do - Solr::Request::Standard.new(:foo => "invalid") - end - - assert_raise(RuntimeError) do - Solr::Request::Standard.new(:query => "valid", :foo => "invalid") - end - - assert_raise(RuntimeError) do - Solr::Request::Standard.new(:query => "valid", :operator => :bogus) - end - end - - def test_common_params - request = Solr::Request::Standard.new(:query => 'query', :start => 10, :rows => 50, - :filter_queries => ['fq1', 'fq2'], :field_list => ['id','title','score'], :operator => :and) - assert_equal 10, request.to_hash[:start] - assert_equal 50, request.to_hash[:rows] - assert_equal ['fq1','fq2'], request.to_hash[:fq] - assert_equal "id,title,score", request.to_hash[:fl] - assert_equal "AND", request.to_hash["q.op"] - end - - def test_missing_params - request = Solr::Request::Standard.new(:query => 'query', :debug_query => false, :facets => {:fields =>[:category_facet]}) - assert_nil request.to_hash[:rows] - assert_no_match /rows/, request.to_s - assert_no_match /facet\.sort/, request.to_s - assert_match /debugQuery/, request.to_s - end - - def test_only_facet_query - request = Solr::Request::Standard.new(:query => 'query', - :facets => { - :queries => ["q1", "q2"], - } - ) - - hash = request.to_hash - assert_equal ["q1", "q2"], hash["facet.query"] - end - - def test_facet_params_all - request = Solr::Request::Standard.new(:query => 'query', - :facets => { - :fields => [:genre, - # field that overrides the global facet parameters - {:year => {:limit => 50, :mincount => 0, :missing => false, :sort => :term, :prefix=>"199", :offset => 7}}], - :queries => ["q1", "q2"], - :prefix => "cat", - :offset => 3, :limit => 5, :zeros => true, :mincount => 20, :sort => :count # global facet parameters - } - ) - - hash = request.to_hash - assert_equal true, hash[:facet] - assert_equal [:genre, :year], hash["facet.field"] - assert_equal ["q1", "q2"], hash["facet.query"] - assert_equal 5, hash["facet.limit"] - assert_equal 20, hash["facet.mincount"] - assert_equal true, hash["facet.sort"] - assert_equal "cat", hash["facet.prefix"] - assert_equal 50, hash["f.year.facet.limit"] - assert_equal 0, hash["f.year.facet.mincount"] - assert_equal false, hash["f.year.facet.sort"] - assert_equal "199", hash["f.year.facet.prefix"] - assert_equal 3, hash["facet.offset"] - assert_equal 7, hash["f.year.facet.offset"] - end - - def test_basic_sort - request = Solr::Request::Standard.new(:query => 'query', :sort => [{:title => :descending}, {:date => :ascending}]) - assert_equal 'query', request.to_hash[:q] - assert_equal 'title desc,date asc', request.to_hash[:sort] - end - - def test_highlighting - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :merge_contiguous => true, - :increment => 100, - :max_snippets => 3, - :require_field_match => true, - :prefix => "", - :suffix => "", - :fragment_size => 300, - :max_analyzed_chars => 102400, - :formatter => 'myFormatter', - :fragmenter => 'myFragmenter', - :use_phrase_highlighter => true - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal true, hash["hl.mergeContiguous"] - assert_equal 100, hash["hl.increment"] - assert_equal 3, hash["hl.snippets"] - assert_equal true, hash["hl.requireFieldMatch"] - assert_equal "", hash["hl.simple.pre"] - assert_equal "", hash["hl.simple.post"] - assert_equal 300, hash["hl.fragsize"] - assert_equal 102400, hash["hl.maxAnalyzedChars"] - assert_equal "myFormatter", hash["hl.formatter"] - assert_equal "myFragmenter", hash["hl.fragmenter"] - assert_equal true, hash["hl.usePhraseHighlighter"] - end - - def test_highlighting2 - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :merge_contiguous => { - :default=>false, :fields=>{'author'=>true} - }, - :increment => { - :default=>100, :fields=>{'author'=>200} - }, - :max_snippets => { - :default=>2,:fields=>{'author'=>3} - }, - :prefix => { - :default=>"", :fields=>{'author'=>""}, - }, - :suffix => { - :default=>"", :fields=>{'author'=>""}, - }, - :fragment_size => { - :default=>300,:fields=>{'author'=>200} - }, - :max_analyzed_chars => { - :default=>102400,:fields=>{'author'=>51200} - }, - :require_field_match => { - :default=>false, :fields=>{'author'=>true} - }, - :formatter => { - :default=>'defaultFormatter', :fields=>{'title'=>'titleFormatter'} - }, - :fragmenter => { - :default=>'defaultFragmenter',:fields=>{'title'=>'titleFragmenter'} - }, - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal false, hash["hl.mergeContiguous"] - assert_equal true, hash["f.author.hl.mergeContiguous"] - assert_equal 100, hash["hl.increment"] - assert_equal 200, hash["f.author.hl.increment"] - assert_equal 2, hash["hl.snippets"] - assert_equal 3, hash["f.author.hl.snippets"] - assert_equal "", hash["hl.simple.pre"] - assert_equal "", hash["f.author.hl.simple.pre"] - assert_equal "", hash["hl.simple.post"] - assert_equal "", hash["f.author.hl.simple.post"] - assert_equal 300, hash["hl.fragsize"] - assert_equal 200, hash["f.author.hl.fragsize"] - assert_equal 102400, hash["hl.maxAnalyzedChars"] - assert_equal 51200, hash["f.author.hl.maxAnalyzedChars"] - assert_equal false, hash["hl.requireFieldMatch"] - assert_equal true, hash["f.author.hl.requireFieldMatch"] - assert_equal 'defaultFormatter', hash["hl.formatter"] - assert_equal 'titleFormatter', hash["f.title.hl.formatter"] - assert_equal 'defaultFragmenter', hash["hl.fragmenter"] - assert_equal 'titleFragmenter', hash["f.title.hl.fragmenter"] - end - - def test_highlighting_regex - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :regex => { - :slop => 0.8, - :pattern => '\w', - :max_analyzed_chars => 10000 - } - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal 0.8, hash["hl.regex.slop"] - assert_equal '\w', hash["hl.regex.pattern"] - assert_equal 10000, hash["hl.regex.maxAnalyzedChars"] - end - - def test_highlighting_regex2 - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :regex => { - :slop => { :default=>0.5, :fields=>{'author'=>0.8} }, - :pattern => { :default=>'\w', :fields=>{'author'=>'\n'} }, - :max_analyzed_chars => { :default=>10000, :fields=>{'author'=>20000} } - } - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal 0.5, hash["hl.regex.slop"] - assert_equal 0.8, hash["f.author.hl.regex.slop"] - assert_equal '\w', hash["hl.regex.pattern"] - assert_equal '\n', hash["f.author.hl.regex.pattern"] - assert_equal 10000, hash["hl.regex.maxAnalyzedChars"] - assert_equal 20000, hash["f.author.hl.regex.maxAnalyzedChars"] - end - - def test_highlighting_alternate_field - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :alternate_field => 'title', - :max_alternate_field_length => 30 - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal "title", hash["hl.alternateField"] - assert_equal 30, hash["hl.maxAlternateFieldLength"] - end - - def test_highlighting_alternate_field2 - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :alternate_field => { - :default=>'default', :fields=>{'title'=>'title', 'author'=>'author'} - }, - :max_alternate_field_length => { - :default=>10, :fields=>{'title'=>30, 'author'=>20} - } - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal "default", hash["hl.alternateField"] - assert_equal "title", hash["f.title.hl.alternateField"] - assert_equal "author", hash["f.author.hl.alternateField"] - assert_equal 10, hash["hl.maxAlternateFieldLength"] - assert_equal 30, hash["f.title.hl.maxAlternateFieldLength"] - assert_equal 20, hash["f.author.hl.maxAlternateFieldLength"] - end - - def test_highlighting_alternate_field_old_style - request = Solr::Request::Standard.new(:query => 'query', - :highlighting => { - :field_list => ['title', 'author'], - :alternate_fields => {'title'=>'title', 'author'=>'author'}, - :max_alternate_field_length => {'title'=>30, 'author'=>20} - } - ) - - hash = request.to_hash - assert_equal true, hash[:hl] - assert_equal "title,author", hash["hl.fl"] - assert_equal "title", hash["f.title.hl.alternateField"] - assert_equal "author", hash["f.author.hl.alternateField"] - assert_equal 30, hash["f.title.hl.maxAlternateFieldLength"] - assert_equal 20, hash["f.author.hl.maxAlternateFieldLength"] - end - - def test_mlt - request = Solr::Request::Standard.new(:query => 'query', - :mlt => { - :count => 5, :field_list => ['field1', 'field2'], - :min_term_freq => 3, :min_doc_freq => 10, - :min_word_length => 4, :max_word_length => 17, - :max_query_terms => 20, :max_tokens_parsed => 100, - :boost => true - } - ) - - hash = request.to_hash - assert_equal true, hash[:mlt] - assert_equal 5, hash["mlt.count"] - assert_equal 'field1,field2', hash["mlt.fl"] - assert_equal 3, hash["mlt.mintf"] - assert_equal 10, hash["mlt.mindf"] - assert_equal 4, hash["mlt.minwl"] - assert_equal 17, hash["mlt.maxwl"] - assert_equal 20, hash["mlt.maxqt"] - assert_equal 100, hash["mlt.maxntp"] - assert_equal true, hash["mlt.boost"] - end - -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/standard_response_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/standard_response_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/standard_response_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/standard_response_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,174 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr_mock_base' - -class StandardResponseTest < SolrMockBaseTestCase - - def test_basic - ruby_code = -<{ - 'status'=>0, - 'QTime'=>1, - 'params'=>{ - 'wt'=>'ruby', - 'rows'=>'10', - 'explainOther'=>'', - 'start'=>'0', - 'hl.fl'=>'', - 'indent'=>'on', - 'q'=>'guido', - 'fl'=>'*,score', - 'qt'=>'standard', - 'version'=>'2.2'}}, - 'response'=>{'numFound'=>1,'start'=>0,'maxScore'=>0.67833745,'docs'=>[ - { - 'name'=>'guido von rossum', - 'id'=>'123', - 'timestamp'=>'2007-01-16T09:55:30.589Z', - 'score'=>0.67833745}] - }} -RUBY_CODE - conn = Solr::Connection.new 'http://localhost:9999' - set_post_return(ruby_code) - response = conn.send(Solr::Request::Standard.new(:query => 'foo')) - assert_equal true, response.ok? - assert response.query_time - assert_equal 1, response.total_hits - assert_equal 0, response.start - assert_equal 0.67833745, response.max_score - assert_equal 1, response.hits.length - end - - def test_iteration - ruby_code = -<{ - 'status'=>0, - 'QTime'=>0, - 'params'=>{ - 'wt'=>'ruby', - 'rows'=>'10', - 'explainOther'=>'', - 'start'=>'0', - 'hl.fl'=>'', - 'indent'=>'on', - 'q'=>'guido', - 'fl'=>'*,score', - 'qt'=>'standard', - 'version'=>'2.2'}}, - 'response'=>{'numFound'=>22,'start'=>0,'maxScore'=>0.53799295,'docs'=>[ - { - 'name'=>'guido von rossum the 0', - 'id'=>'0', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 1', - 'id'=>'1', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 2', - 'id'=>'2', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 3', - 'id'=>'3', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 4', - 'id'=>'4', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 5', - 'id'=>'5', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 6', - 'id'=>'6', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 7', - 'id'=>'7', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 8', - 'id'=>'8', - 'score'=>0.53799295}, - { - 'name'=>'guido von rossum the 9', - 'id'=>'9', - 'score'=>0.53799295}] - }} -RUBY_CODE - conn = Solr::Connection.new 'http://localhost:9999' - set_post_return(ruby_code) - - count = 0 - conn.query('foo') do |hit| - assert_equal "guido von rossum the #{count}", hit['name'] - count += 1 - end - - assert_equal 10, count - end - - def test_facets - ruby_code = - <{ - 'status'=>0, - 'QTime'=>1897, - 'params'=>{ - 'facet.limit'=>'20', - 'wt'=>'ruby', - 'rows'=>'0', - 'facet'=>'true', - 'facet.mincount'=>'1', - 'facet.field'=>[ - 'subject_genre_facet', - 'subject_geographic_facet', - 'subject_format_facet', - 'subject_era_facet', - 'subject_topic_facet'], - 'indent'=>'true', - 'fl'=>'*,score', - 'q'=>'[* TO *]', - 'qt'=>'standard', - 'facet.sort'=>'true'}}, - 'response'=>{'numFound'=>49999,'start'=>0,'maxScore'=>1.0,'docs'=>[] - }, - 'facet_counts'=>{ - 'facet_queries'=>{}, - 'facet_fields'=>{ - 'subject_genre_facet'=>[ - 'Biography.',2605, - 'Congresses.',1837, - 'Bibliography.',672, - 'Exhibitions.',642, - 'Periodicals.',615, - 'Sources.',485]}} - } -RUBY_CODE - set_post_return(ruby_code) - conn = Solr::Connection.new "http://localhost:9999" - response = conn.query('foo') - facets = response.field_facets('subject_genre_facet') - assert_equal 2605, facets[0].value - assert_equal 485, facets[5].value - end - -end - diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/suite.rb solr-3.3/client/ruby/solr-ruby/test/unit/suite.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/suite.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/suite.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# dynamically require all tests files -Dir.glob("*_test.rb").each do | file | - require file -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/tab_delimited.txt solr-3.3/client/ruby/solr-ruby/test/unit/tab_delimited.txt --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/tab_delimited.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/tab_delimited.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -medium associatedURL boxHeightInInches boxLengthInInches boxWeightInPounds boxWidthInInches scannednumber upc asin country title fullTitle series numberInSeries edition aspect mediacount genre price currentValue language netrating description owner publisher published rare purchaseDate rating used signed hasExperienced notes location paid condition notowned author illustrator pages -book 9780865681743 0865681740 us Xing Yi Nei Gong: Xing Yi Health Maintenance and Internal Strength Development Xing Yi Nei Gong: Xing Yi Health Maintenance and Internal Strength Development Paperback $21.95 $14.05 4.5 This is the most complete book on the art of xing yi (hsing Yi) available. It includes the complete xing yi history and lineage going back eight generations; manuscripts handed down from famous practitioners Dai Long Bang and Li Neng Ran; 16 health maintenance and power development exercises; qigong (chi kung) exerices; xing yi long spear power training exercises; and more. Unique Publications 1998-02-10 12:00:00 +0000 2007-02-03 02:22:25 -0500 Dan Miller/ Tim Cartmell 200 diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/util_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/util_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/util_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/util_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require 'solr' -require 'test/unit' - -class UtilTest < Test::Unit::TestCase - def test_paired_array_to_hash - assert_equal({:key1 => :value1, :key2 => :value2}, Solr::Util.paired_array_to_hash([:key1, :value1, :key2, :value2])) - end - - def test_query_parser_escape - assert_equal %q(http\:\/\/lucene\.apache\.org\/solr), Solr::Util.query_parser_escape("http://lucene.apache.org/solr") - end -end diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb solr-3.3/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -begin - require 'solr' - require 'test/unit' - require 'xml/libxml' - - class XPathMapperTest < Test::Unit::TestCase - - def setup - @doc = XML::Document.file(File.expand_path(File.dirname(__FILE__)) + "/xpath_test_file.xml") - end - - def test_simple_xpath - mapping = {:solr_field1 => :'/root/parent/child', - :solr_field2 => :'/root/parent/child/@attribute'} - - mapper = Solr::Importer::XPathMapper.new(mapping) - mapped_data = mapper.map(@doc) - - assert_equal ['text1', 'text2'], mapped_data[:solr_field1] - assert_equal ['attribute1', 'attribute2'], mapped_data[:solr_field2] - end - - end -rescue LoadError => e - puts "XPathMapperTest not run because #{e}" -end \ No newline at end of file diff -Nru solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/xpath_test_file.xml solr-3.3/client/ruby/solr-ruby/test/unit/xpath_test_file.xml --- solr-1.4.0+ds1/client/ruby/solr-ruby/test/unit/xpath_test_file.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/client/ruby/solr-ruby/test/unit/xpath_test_file.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ - - - - text1 - text2 - - \ No newline at end of file diff -Nru solr-1.4.0+ds1/common-build.xml solr-3.3/common-build.xml --- solr-1.4.0+ds1/common-build.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/common-build.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,426 +0,0 @@ - - - - - This file is designed for importing into a main build file, and not intended - for standalone use. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
${@{file}.sum}
- - -
${@{file}.base} -
-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Signing @{input.file} Sig File: @{output.file} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ################################################################## - Maven ant tasks not found. - Please make sure the maven-ant-tasks jar is in ANT_HOME/lib, or made - available to Ant using other mechanisms like -lib or CLASSPATH. - ################################################################## - - - -
diff -Nru solr-1.4.0+ds1/contrib/clustering/build.xml solr-3.3/contrib/clustering/build.xml --- solr-1.4.0+ds1/contrib/clustering/build.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/build.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,182 +0,0 @@ - - - - - - - - - - - - Clustering Integraton - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/clustering/CHANGES.txt solr-3.3/contrib/clustering/CHANGES.txt --- solr-1.4.0+ds1/contrib/clustering/CHANGES.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/CHANGES.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -Apache Solr Clustering Implementation - -Intro: - -See http://wiki.apache.org/solr/ClusteringComponent - -CHANGES - -$Id:$ - -================== Release 1.4.0 ================== - -Solr Clustering will be released for the first time in Solr 1.4. See http://wiki.apache.org/solr/ClusteringComponent - for details on using. \ No newline at end of file diff -Nru solr-1.4.0+ds1/contrib/clustering/README.txt solr-3.3/contrib/clustering/README.txt --- solr-1.4.0+ds1/contrib/clustering/README.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/README.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -The Clustering contrib plugin for Solr provides a generic mechanism for plugging in third party clustering implementations. -It currently provides clustering support for search results using the Carrot2 project. - -See http://wiki.apache.org/solr/ClusteringComponent for how to get started. - -Also, note, some of the Carrot2 libraries cannot be distributed in binary form because they are LGPL. Thus, you will have -to download those components. See the build.xml file located in this directory for the location of the libraries. -The libraries you will need are: nni.jar, Colt, PNJ and simple-xml. diff -Nru solr-1.4.0+ds1/contrib/clustering/solr-clustering-pom.xml.template solr-3.3/contrib/clustering/solr-clustering-pom.xml.template --- solr-1.4.0+ds1/contrib/clustering/solr-clustering-pom.xml.template 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/solr-clustering-pom.xml.template 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @maven_version@ - - - org.apache.solr - solr-clustering - Apache Solr Clustering - @maven_version@ - Apache Solr Clustering - jar - - - org.apache.solr - solr-solrj - @maven_version@ - - - org.apache.solr - solr-core - @maven_version@ - - - - org.carrot2 - carrot2-mini - 3.1.0 - - - - - carrot2.org - Carrot2 Maven2 repository - http://download.carrot2.org/maven2/ - - - diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,244 +0,0 @@ -package org.apache.solr.handler.clustering.carrot2; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.*; - -import org.apache.commons.lang.StringUtils; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.SetBasedFieldSelector; -import org.apache.lucene.search.Query; -import org.apache.solr.common.params.HighlightParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.common.SolrException; -import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.clustering.SearchClusteringEngine; -import org.apache.solr.highlight.SolrHighlighter; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.search.*; -import org.apache.solr.util.RefCounted; -import org.carrot2.core.*; -import org.carrot2.core.attribute.AttributeNames; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Sets; - -/** - * Search results clustering engine based on Carrot2 clustering algorithms. - *

- * Output from this class is subject to change. - * - * @link http://project.carrot2.org - */ -@SuppressWarnings("unchecked") -public class CarrotClusteringEngine extends SearchClusteringEngine { - private transient static Logger log = LoggerFactory - .getLogger(CarrotClusteringEngine.class); - - /** - * Carrot2 controller that manages instances of clustering algorithms - */ - private CachingController controller = new CachingController(); - private Class clusteringAlgorithmClass; - - private String idFieldName; - - public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) { - try { - // Prepare attributes for Carrot2 clustering call - Map attributes = new HashMap(); - List documents = getDocuments(docList, query, sreq); - attributes.put(AttributeNames.DOCUMENTS, documents); - attributes.put(AttributeNames.QUERY, query.toString()); - - // Pass extra overriding attributes from the request, if any - extractCarrotAttributes(sreq.getParams(), attributes); - - // Perform clustering and convert to named list - return clustersToNamedList(controller.process(attributes, - clusteringAlgorithmClass).getClusters(), sreq.getParams()); - } catch (Exception e) { - log.error("Carrot2 clustering failed", e); - throw new RuntimeException(e); - } - } - - @Override - public String init(NamedList config, final SolrCore core) { - String result = super.init(config, core); - SolrParams initParams = SolrParams.toSolrParams(config); - - // Initialize Carrot2 controller. Pass initialization attributes, if any. - HashMap initAttributes = new HashMap(); - extractCarrotAttributes(initParams, initAttributes); - this.controller.init(initAttributes); - - this.idFieldName = core.getSchema().getUniqueKeyField().getName(); - - // Make sure the requested Carrot2 clustering algorithm class is available - String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM); - Class algorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName); - if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) { - throw new IllegalArgumentException("Class provided as " - + CarrotParams.ALGORITHM + " must implement " - + IClusteringAlgorithm.class.getName()); - } - this.clusteringAlgorithmClass = (Class) algorithmClass; - - return result; - } - - /** - * Prepares Carrot2 documents for clustering. - */ - private List getDocuments(DocList docList, - Query query, final SolrQueryRequest sreq) throws IOException { - SolrHighlighter highligher = null; - SolrParams solrParams = sreq.getParams(); - SolrCore core = sreq.getCore(); - - // Names of fields to deliver content for clustering - String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url"); - String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title"); - String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, - titleField); - if (StringUtils.isBlank(snippetField)) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME - + " must not be blank."); - } - Set fieldsToLoad = Sets.newHashSet(urlField, titleField, - snippetField, idFieldName); - - // Get the documents - DocIterator docsIter = docList.iterator(); - boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, - false); - - SolrQueryRequest req = null; - String[] snippetFieldAry = null; - if (produceSummary == true) { - highligher = core.getHighlighter(); - Map args = new HashMap(); - snippetFieldAry = new String[]{snippetField}; - args.put(HighlightParams.FIELDS, snippetFieldAry); - args.put(HighlightParams.HIGHLIGHT, "true"); - req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) { - @Override - public SolrIndexSearcher getSearcher() { - return sreq.getSearcher(); - } - }; - } - - SolrIndexSearcher searcher = sreq.getSearcher(); - List result = new ArrayList(docList.size()); - FieldSelector fieldSelector = new SetBasedFieldSelector(fieldsToLoad, - Collections.emptySet()); - float[] scores = {1.0f}; - int[] docsHolder = new int[1]; - Query theQuery = query; - - while (docsIter.hasNext()) { - Integer id = docsIter.next(); - org.apache.lucene.document.Document doc = searcher.doc(id, - fieldSelector); - String snippet = getValue(doc, snippetField); - if (produceSummary == true) { - docsHolder[0] = id.intValue(); - DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f); - highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry); - } - Document carrotDocument = new Document(getValue(doc, titleField), - snippet, doc.get(urlField)); - carrotDocument.addField("solrId", doc.get(idFieldName)); - result.add(carrotDocument); - } - - return result; - } - - protected String getValue(org.apache.lucene.document.Document doc, - String field) { - StringBuilder result = new StringBuilder(); - String[] vals = doc.getValues(field); - for (int i = 0; i < vals.length; i++) { - // Join multiple values with a period so that Carrot2 does not pick up - // phrases that cross field value boundaries (in most cases it would - // create useless phrases). - result.append(vals[i]).append(" . "); - } - return result.toString().trim(); - } - - private List clustersToNamedList(List carrotClusters, - SolrParams solrParams) { - List result = new ArrayList(); - clustersToNamedList(carrotClusters, result, solrParams.getBool( - CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt( - CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE)); - return result; - } - - private void clustersToNamedList(List outputClusters, - List parent, boolean outputSubClusters, int maxLabels) { - for (Cluster outCluster : outputClusters) { - NamedList cluster = new SimpleOrderedMap(); - parent.add(cluster); - - List labels = outCluster.getPhrases(); - if (labels.size() > maxLabels) - labels = labels.subList(0, maxLabels); - cluster.add("labels", labels); - - List docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments(); - List docList = new ArrayList(); - cluster.add("docs", docList); - for (Document doc : docs) { - docList.add(doc.getField("solrId")); - } - - if (outputSubClusters) { - List subclusters = new ArrayList(); - cluster.add("clusters", subclusters); - clustersToNamedList(outCluster.getSubclusters(), subclusters, - outputSubClusters, maxLabels); - } - } - } - - /** - * Extracts parameters that can possibly match some attributes of Carrot2 algorithms. - */ - private void extractCarrotAttributes(SolrParams solrParams, - Map attributes) { - // Extract all non-predefined parameters. This way, we'll be able to set all - // parameters of Carrot2 algorithms without defining their names as constants. - for (Iterator paramNames = solrParams.getParameterNamesIterator(); paramNames - .hasNext();) { - String paramName = paramNames.next(); - if (!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) { - attributes.put(paramName, solrParams.get(paramName)); - } - } - } -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -package org.apache.solr.handler.clustering.carrot2; - -import java.util.Set; - -import com.google.common.collect.ImmutableSet; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -public interface CarrotParams { - - String CARROT_PREFIX = "carrot."; - - String ALGORITHM = CARROT_PREFIX + "algorithm"; - String TITLE_FIELD_NAME = CARROT_PREFIX + "title"; - String URL_FIELD_NAME = CARROT_PREFIX + "url"; - String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet"; - String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary"; - String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions"; - String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters"; - - public static final Set CARROT_PARAM_NAMES = ImmutableSet.of( - ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, - PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS); -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,190 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine; -import org.apache.solr.handler.component.ResponseBuilder; -import org.apache.solr.handler.component.SearchComponent; -import org.apache.solr.search.DocListAndSet; -import org.apache.solr.util.plugin.SolrCoreAware; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - - -/** - * Provide a plugin for clustering results. Can either be for search results (i.e. via Carrot2) or for - * clustering documents (i.e. via Mahout) - *

- * This engine is experimental. Output from this engine is subject to change in future releases. - * - */ -public class ClusteringComponent extends SearchComponent implements SolrCoreAware { - private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class); - - private Map searchClusteringEngines = new HashMap(); - private Map documentClusteringEngines = new HashMap(); - /** - * Base name for all spell checker query parameters. This name is also used to - * register this component with SearchHandler. - */ - public static final String COMPONENT_NAME = "clustering"; - private NamedList initParams; - - - public void prepare(ResponseBuilder rb) throws IOException { - SolrParams params = rb.req.getParams(); - if (!params.getBool(COMPONENT_NAME, false)) { - return; - } - } - - public void process(ResponseBuilder rb) throws IOException { - SolrParams params = rb.req.getParams(); - if (!params.getBool(COMPONENT_NAME, false)) { - return; - } - String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME); - boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false); - if (useResults == true) { - SearchClusteringEngine engine = searchClusteringEngines.get(name); - if (engine != null) { - DocListAndSet results = rb.getResults(); - Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req); - rb.rsp.add("clusters", clusters); - } else { - log.warn("No engine for: " + name); - } - } - boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false); - if (useCollection == true) { - DocumentClusteringEngine engine = documentClusteringEngines.get(name); - if (engine != null) { - boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false); - NamedList nl = null; - - //TODO: This likely needs to be made into a background task that runs in an executor - if (useDocSet == true) { - nl = engine.cluster(rb.getResults().docSet, params); - } else { - nl = engine.cluster(params); - } - rb.rsp.add("clusters", nl); - } else { - log.warn("No engine for " + name); - } - } - } - - @Override - @SuppressWarnings("unchecked") - public void init(NamedList args) { - super.init(args); - this.initParams = args; - } - - public void inform(SolrCore core) { - if (initParams != null) { - log.info("Initializing Clustering Engines"); - boolean searchHasDefault = false; - boolean documentHasDefault = false; - for (int i = 0; i < initParams.size(); i++) { - if (initParams.getName(i).equals("engine")) { - NamedList engineNL = (NamedList) initParams.getVal(i); - String className = (String) engineNL.get("classname"); - if (className == null) { - className = CarrotClusteringEngine.class.getName(); - } - SolrResourceLoader loader = core.getResourceLoader(); - ClusteringEngine clusterer = (ClusteringEngine) loader.newInstance(className); - if (clusterer != null) { - String name = clusterer.init(engineNL, core); - if (name != null) { - boolean isDefault = name.equals(ClusteringEngine.DEFAULT_ENGINE_NAME); - if (clusterer instanceof SearchClusteringEngine) { - if (isDefault == true && searchHasDefault == false) { - searchHasDefault = true; - } else if (isDefault == true && searchHasDefault == true) { - throw new RuntimeException("More than one engine is missing name: " + engineNL); - } - searchClusteringEngines.put(name, (SearchClusteringEngine) clusterer); - } else if (clusterer instanceof DocumentClusteringEngine) { - if (isDefault == true && documentHasDefault == false) { - searchHasDefault = true; - } else if (isDefault == true && documentHasDefault == true) { - throw new RuntimeException("More than one engine is missing name: " + engineNL); - } - documentClusteringEngines.put(name, (DocumentClusteringEngine) clusterer); - } - } else { - if (clusterer instanceof SearchClusteringEngine && searchHasDefault == false) { - searchClusteringEngines.put(ClusteringEngine.DEFAULT_ENGINE_NAME, (SearchClusteringEngine) clusterer); - searchHasDefault = true; - } else if (clusterer instanceof DocumentClusteringEngine && documentHasDefault == false) { - documentClusteringEngines.put(ClusteringEngine.DEFAULT_ENGINE_NAME, (DocumentClusteringEngine) clusterer); - documentHasDefault = true; - } else { - throw new RuntimeException("More than one engine is missing name: " + engineNL); - } - } - } - } - } - log.info("Finished Initializing Clustering Engines"); - } - } - - /* - * @return Unmodifiable Map of the engines, key is the name from the config, value is the engine - * */ - public Map getSearchClusteringEngines() { - return Collections.unmodifiableMap(searchClusteringEngines); - } - - // /////////////////////////////////////////// - // / SolrInfoMBean - // ////////////////////////////////////////// - - @Override - public String getDescription() { - return "A Clustering component"; - } - - @Override - public String getVersion() { - return "$Revision:$"; - } - - @Override - public String getSourceId() { - return "$Id:$"; - } - - @Override - public String getSource() { - return "$URL:$"; - } - -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringEngine.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringEngine.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringEngine.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringEngine.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; - - -/** - * - * - **/ -public class ClusteringEngine { - private String name; - public static final String ENGINE_NAME = "name"; - public static final String DEFAULT_ENGINE_NAME = "default"; - - public String init(NamedList config, SolrCore core) { - name = (String) config.get(ENGINE_NAME); - - return name; - } - - public String getName() { - return name; - } -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringParams.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringParams.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringParams.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringParams.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/** - * - * - **/ -public interface ClusteringParams { - - public static final String CLUSTERING_PREFIX = "clustering."; - - public static final String ENGINE_NAME = CLUSTERING_PREFIX + "engine"; - - public static final String USE_SEARCH_RESULTS = CLUSTERING_PREFIX + "results"; - - public static final String USE_COLLECTION = CLUSTERING_PREFIX + "collection"; - /** - * When document clustering, cluster on the Doc Set - */ - public static final String USE_DOC_SET = CLUSTERING_PREFIX + "docs.useDocSet"; -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,54 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.core.SolrCore; -import org.apache.solr.search.DocList; -import org.apache.solr.search.DocSet; -import org.apache.lucene.search.Query; - - -/** - * Experimental. Subject to change before the next release. - * - **/ -public abstract class DocumentClusteringEngine extends ClusteringEngine { - - /** - * Experimental. Subject to change before the next release - * - * Cluster all the documents in the index. Clustering is often an expensive task that can take a long time. - * @param solrParams The params controlling clustering - * @return The clustering results - */ - public abstract NamedList cluster(SolrParams solrParams); - - /** - * Experimental. Subject to change before the next release - * - * - * Cluster the set of docs. Clustering of documents is often an expensive task that can take a long time. - * @param docs The docs to cluster. If null, cluster all docs as in {@link #cluster(org.apache.solr.common.params.SolrParams)} - * @param solrParams The params controlling the clustering - * @return The results. - */ - public abstract NamedList cluster(DocSet docs, SolrParams solrParams); - - -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java --- solr-1.4.0+ds1/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.core.SolrCore; -import org.apache.solr.search.DocList; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.lucene.search.Query; - - -/** - * - * - **/ -public abstract class SearchClusteringEngine extends ClusteringEngine { - - - public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq); - - -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java --- solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,198 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.util.AbstractSolrTestCase; - - -/** - * - */ -public class AbstractClusteringTest extends AbstractSolrTestCase { - protected int numberOfDocs = 0; - - @Override - public void setUp() throws Exception { - super.setUp(); - - numberOfDocs = 0; - for (String[] doc : DOCUMENTS) { - assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url", doc[0], "title", doc[1], "snippet", doc[2])); - numberOfDocs++; - } - assertU("commit", commit()); - } - - public String getSchemaFile() { - return "schema.xml"; - } - - public String getSolrConfigFile() { - return "solrconfig.xml"; - } - - final String[][] DOCUMENTS = new String[][]{ - {"http://en.wikipedia.org/wiki/Data_mining", - "Data Mining - Wikipedia", - "Article about knowledge-discovery in databases (KDD), the practice of automatically searching large stores of data for patterns."}, - - - {"http://en.wikipedia.org/wiki/Datamining", - "Data mining - Wikipedia, the free encyclopedia", - "Data mining is the entire process of applying computer-based methodology, ... Moreover, some data-mining systems such as neural networks are inherently geared ..."}, - - - {"http://www.statsoft.com/textbook/stdatmin.html", - "Electronic Statistics Textbook: Data Mining Techniques", - "Outlines the crucial concepts in data mining, defines the data warehousing process, and offers examples of computational and graphical exploratory data analysis techniques."}, - - - {"http://www.thearling.com/text/dmwhite/dmwhite.htm", - "An Introduction to Data Mining", - "Data mining, the extraction of hidden predictive information from large ... Data mining tools predict future trends and behaviors, allowing businesses to ..."}, - - - {"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm", - "Data Mining: What is Data Mining?", - "Outlines what knowledge discovery, the process of analyzing data from different perspectives and summarizing it into useful information, can do and how it works."}, - - - {"http://www.spss.com/datamine", - "Data Mining Software, Data Mining Applications and Data Mining Solutions", - "The patterns uncovered using data mining help organizations make better and ... data mining customer ... Data mining applications, on the other hand, embed ..."}, - - - {"http://www.kdnuggets.com/", - "KD Nuggets", - "Newsletter on the data mining and knowledge industries, offering information on data mining, knowledge discovery, text mining, and web mining software, courses, jobs, publications, and meetings."}, - - - {"http://www.answers.com/topic/data-mining", - "data mining: Definition from Answers.com", - "data mining n. The automatic extraction of useful, often previously unknown information from large databases or data ... Data Mining For Investing ..."}, - - - {"http://www.statsoft.com/products/dataminer.htm", - "STATISTICA Data Mining and Predictive Modeling Solutions", - "GRC site-wide menuing system research and development. ... Contact a Data Mining Solutions Consultant. News and Success Stories. Events ..."}, - - - {"http://datamining.typepad.com/", - "Data Mining: Text Mining, Visualization and Social Media", - "Commentary on text mining, data mining, social media and data visualization. ... While mining Twitter data for business and marketing intelligence (trend/buzz ..."}, - - - {"http://www.twocrows.com/", - "Two Crows Corporation", - "Dedicated to the development, marketing, sales and support of tools for knowledge discovery to make data mining accessible and easy to use."}, - - - {"http://www.thearling.com/", - "Thearling.com", - "Kurt Thearling's site dedicated to sharing information about data mining, the automated extraction of hidden predictive information from databases, and other analytic technologies."}, - - - {"http://www.ccsu.edu/datamining/", - "CCSU - Data Mining", - "Offers degrees and certificates in data mining. Allows students to explore cutting-edge data mining techniques and applications: market basket analysis, decision trees, neural networks, machine learning, web mining, and data modeling."}, - - - {"http://www.oracle.com/technology/products/bi/odm", - "Oracle Data Mining", - "Oracle Data Mining Product Center ... New Oracle Data Mining Powers New Social CRM Application (more information ... Mining High-Dimensional Data for ..."}, - - - {"http://databases.about.com/od/datamining/a/datamining.htm", - "Data Mining: An Introduction", - "About.com article on how businesses are discovering new trends and patterns of behavior that previously went unnoticed through data mining, automated statistical analysis techniques."}, - - - {"http://www.dmoz.org/Computers/Software/Databases/Data_Mining/", - "Open Directory - Computers: Software: Databases: Data Mining", - "Data Mining and Knowledge Discovery - A peer-reviewed journal publishing ... Data mining creates information assets that an organization can leverage to ..."}, - - - {"http://www.cs.wisc.edu/dmi/", - "DMI:Data Mining Institute", - "Data Mining Institute at UW-Madison ... The Data Mining Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data Mining Group of Microsoft ..."}, - - - {"http://www.the-data-mine.com/", - "The Data Mine", - "Provides information about data mining also known as knowledge discovery in databases (KDD) or simply knowledge discovery. List software, events, organizations, and people working in data mining."}, - - - {"http://www.statserv.com/datamining.html", - "St@tServ - About Data Mining", - "St@tServ Data Mining page ... Data mining in molecular biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data Mining Resources, ..."}, - - - {"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm", - "MIT OpenCourseWare | Sloan School of Management | 15.062 Data Mining ...", - "Introduces students to a class of methods known as data mining that assists managers in recognizing patterns and making intelligent use of massive amounts of ..."}, - - - {"http://www.pentaho.com/products/data_mining/", - "Pentaho Commercial Open Source Business Intelligence: Data Mining", - "For example, data mining can warn you there's a high probability a specific ... Pentaho Data Mining is differentiated by its open, standards-compliant nature, ..."}, - - - {"http://www.investorhome.com/mining.htm", - "Investor Home - Data Mining", - "Data Mining or Data Snooping is the practice of searching for relationships and ... Data mining involves searching through databases for correlations and patterns ..."}, - - - {"http://www.datamining.com/", - "Predictive Modeling and Predictive Analytics Solutions | Enterprise ...", - "Insightful Enterprise Miner - Enterprise data mining for predictive modeling and predictive analytics."}, - - - {"http://www.sourcewatch.org/index.php?title=Data_mining", - "Data mining - SourceWatch", - "These agencies reported 199 data mining projects, of which 68 ... Office, \"DATA MINING. ... powerful technology known as data mining -- and how, in the ..."}, - - - {"http://www.autonlab.org/tutorials/", - "Statistical Data Mining Tutorials", - "Includes a set of tutorials on many aspects of statistical data mining, including the foundations of probability, the foundations of statistical data analysis, and most of the classic machine learning and data mining algorithms."}, - - - {"http://www.microstrategy.com/data-mining/index.asp", - "Data Mining", - "With MicroStrategy, data mining scoring is fully integrated into mainstream ... The integration of data mining models from other applications is accomplished by ..."}, - - - {"http://www.datamininglab.com/", - "Elder Research", - "Provides consulting and short courses in data mining and pattern discovery patterns in data."}, - - - {"http://www.sqlserverdatamining.com/", - "SQL Server Data Mining > Home", - "SQL Server Data Mining Portal ... Data Mining as an Application Platform (Whitepaper) Creating a Web Cross-sell Application with SQL Server 2005 Data Mining (Article) ..."}, - - - {"http://databases.about.com/cs/datamining/g/dmining.htm", - "Data Mining", - "What is data mining? Find out here! ... Book Review: Data Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does it Have to Do with ..."}, - - - {"http://www.sas.com/technologies/analytics/datamining/index.html", - "Data Mining Software and Text Mining | SAS", - "... raw data to smarter ... Data Mining is an iterative process of creating ... The knowledge gleaned from data and text mining can be used to fuel ..."} - }; -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java --- solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,169 +0,0 @@ -package org.apache.solr.handler.clustering.carrot2; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Sort; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.handler.clustering.AbstractClusteringTest; -import org.apache.solr.handler.clustering.ClusteringComponent; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.search.DocList; -import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.util.RefCounted; -import org.carrot2.util.attribute.AttributeUtils; - -import java.io.IOException; -import java.util.List; - -/** - * - */ -@SuppressWarnings("unchecked") -public class CarrotClusteringEngineTest extends AbstractClusteringTest { - public void testCarrotLingo() throws Exception { - checkEngine(getClusteringEngine("default"), 10); - } - - public void testCarrotStc() throws Exception { - checkEngine(getClusteringEngine("stc"), 1); - } - - public void testWithoutSubclusters() throws Exception { - checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs), - 1, 1, 0); - } - - public void testWithSubclusters() throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true); - checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs, - params), 1, 1, 2); - } - - public void testNumDescriptions() throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 5); - params.set(CarrotParams.NUM_DESCRIPTIONS, 3); - checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs, - params), 1, 3, 0); - } - - public void testCarrotAttributePassing() throws Exception { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "depth"), 1); - params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 3); - checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs, - params), 1, 3, 0); - } - - private CarrotClusteringEngine getClusteringEngine(String engineName) { - ClusteringComponent comp = (ClusteringComponent) h.getCore() - .getSearchComponent("clustering"); - assertNotNull("clustering component should not be null", comp); - CarrotClusteringEngine engine = (CarrotClusteringEngine) comp - .getSearchClusteringEngines().get(engineName); - assertNotNull("clustering engine for name: " + engineName - + " should not be null", engine); - return engine; - } - - private List checkEngine(CarrotClusteringEngine engine, - int expectedNumClusters) throws IOException { - return checkEngine(engine, expectedNumClusters, new ModifiableSolrParams()); - } - - private List checkEngine(CarrotClusteringEngine engine, - int expectedNumClusters, SolrParams clusteringParams) throws IOException { - // Get all documents to cluster - RefCounted ref = h.getCore().getSearcher(); - MatchAllDocsQuery query = new MatchAllDocsQuery(); - DocList docList; - try { - SolrIndexSearcher searcher = ref.get(); - docList = searcher.getDocList(query, (Query) null, new Sort(), 0, - numberOfDocs); - assertEquals("docList size", this.numberOfDocs, docList.matches()); - } finally { - ref.decref(); - } - - ModifiableSolrParams solrParams = new ModifiableSolrParams(); - solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true"); - solrParams.add(clusteringParams); - - // Perform clustering - LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams); - List results = (List) engine.cluster(query, docList, req); - req.close(); - assertEquals("number of clusters", expectedNumClusters, results.size()); - checkClusters(results, false); - return results; - } - - private void checkClusters(List results, int expectedDocCount, - int expectedLabelCount, int expectedSubclusterCount) { - for (int i = 0; i < results.size(); i++) { - NamedList cluster = (NamedList) results.get(i); - checkCluster(cluster, expectedDocCount, expectedLabelCount, - expectedSubclusterCount); - } - } - - private void checkClusters(List results, boolean hasSubclusters) { - for (int i = 0; i < results.size(); i++) { - checkCluster((NamedList) results.get(i), hasSubclusters); - } - } - - private void checkCluster(NamedList cluster, boolean hasSubclusters) { - List docs = (List) cluster.get("docs"); - assertNotNull("docs is null and it shouldn't be", docs); - for (int j = 0; j < docs.size(); j++) { - String id = (String) docs.get(j); - assertNotNull("id is null and it shouldn't be", id); - } - - List labels = (List) cluster.get("labels"); - assertNotNull("labels is null but it shouldn't be", labels); - - if (hasSubclusters) { - List subclusters = (List) cluster.get("clusters"); - assertNotNull("subclusters is null but it shouldn't be", subclusters); - } - } - - private void checkCluster(NamedList cluster, int expectedDocCount, - int expectedLabelCount, int expectedSubclusterCount) { - checkCluster(cluster, expectedSubclusterCount > 0); - assertEquals("number of docs in cluster", expectedDocCount, - ((List) cluster.get("docs")).size()); - assertEquals("number of labels in cluster", expectedLabelCount, - ((List) cluster.get("labels")).size()); - - if (expectedSubclusterCount > 0) { - List subclusters = (List) cluster.get("clusters"); - assertEquals("numClusters", expectedSubclusterCount, subclusters.size()); - assertEquals("number of subclusters in cluster", - expectedSubclusterCount, subclusters.size()); - } - } -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java --- solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -package org.apache.solr.handler.clustering.carrot2; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import com.google.common.collect.Lists; -import org.carrot2.core.*; -import org.carrot2.core.attribute.AttributeNames; -import org.carrot2.core.attribute.Processing; -import org.carrot2.util.attribute.*; -import org.carrot2.util.attribute.constraint.IntRange; - -import java.util.List; - -@Bindable(prefix = "MockClusteringAlgorithm") -public class MockClusteringAlgorithm extends ProcessingComponentBase implements - IClusteringAlgorithm { - @Input - @Processing - @Attribute(key = AttributeNames.DOCUMENTS) - private List documents; - - @Output - @Processing - @Attribute(key = AttributeNames.CLUSTERS) - private List clusters; - - @Input - @Processing - @Attribute - @IntRange(min = 1, max = 5) - private int depth = 2; - - @Input - @Processing - @Attribute - @IntRange(min = 1, max = 5) - private int labels = 1; - - @Override - public void process() throws ProcessingException { - clusters = Lists.newArrayList(); - if (documents == null) { - return; - } - - int documentIndex = 1; - for (Document document : documents) { - StringBuilder label = new StringBuilder("Cluster " + documentIndex); - Cluster cluster = createCluster(label.toString(), document); - clusters.add(cluster); - for (int i = 1; i <= depth; i++) { - label.append("."); - label.append(i); - Cluster newCluster = createCluster(label.toString(), document); - cluster.addSubclusters(createCluster(label.toString(), document), newCluster); - cluster = newCluster; - } - documentIndex++; - } - } - - private Cluster createCluster(String labelBase, Document... documents) { - Cluster cluster = new Cluster(); - for (int i = 0; i < labels; i++) { - cluster.addPhrases(labelBase + "#" + (i + 1)); - } - cluster.addDocuments(documents); - return cluster; - } -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java --- solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,80 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.component.QueryComponent; -import org.apache.solr.handler.component.SearchComponent; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryResponse; -import org.apache.solr.request.SolrRequestHandler; - - -/** - * - * - **/ -public class ClusteringComponentTest extends AbstractClusteringTest { - - public void testComponent() throws Exception { - SolrCore core = h.getCore(); - - SearchComponent sc = core.getSearchComponent("clustering"); - assertTrue("sc is null and it shouldn't be", sc != null); - ModifiableSolrParams params = new ModifiableSolrParams(); - - params.add(ClusteringComponent.COMPONENT_NAME, "true"); - params.add(CommonParams.Q, "*:*"); - - params.add(ClusteringParams.USE_SEARCH_RESULTS, "true"); - - - SolrRequestHandler handler = core.getRequestHandler("standard"); - SolrQueryResponse rsp; - rsp = new SolrQueryResponse(); - rsp.add("responseHeader", new SimpleOrderedMap()); - handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); - NamedList values = rsp.getValues(); - Object clusters = values.get("clusters"); - //System.out.println("Clusters: " + clusters); - assertTrue("clusters is null and it shouldn't be", clusters != null); - - - params = new ModifiableSolrParams(); - params.add(ClusteringComponent.COMPONENT_NAME, "true"); - params.add(ClusteringParams.ENGINE_NAME, "mock"); - params.add(ClusteringParams.USE_COLLECTION, "true"); - params.add(QueryComponent.COMPONENT_NAME, "false"); - - handler = core.getRequestHandler("docClustering"); - - rsp = new SolrQueryResponse(); - rsp.add("responseHeader", new SimpleOrderedMap()); - handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); - values = rsp.getValues(); - clusters = values.get("clusters"); - //System.out.println("Clusters: " + clusters); - assertTrue("clusters is null and it shouldn't be", clusters != null); - - - } - -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java --- solr-1.4.0+ds1/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -package org.apache.solr.handler.clustering; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.search.DocSet; - - -/** - * - * - **/ -public class MockDocumentClusteringEngine extends DocumentClusteringEngine { - public NamedList cluster(DocSet docs, SolrParams solrParams) { - NamedList result = new NamedList(); - return result; - } - - public NamedList cluster(SolrParams solrParams) { - NamedList result = new NamedList(); - return result; - } -} diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/mapping-ISOLatin1Accent.txt solr-3.3/contrib/clustering/src/test/resources/solr/conf/mapping-ISOLatin1Accent.txt --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/mapping-ISOLatin1Accent.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/mapping-ISOLatin1Accent.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# à => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Ã… => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# ÃŒ => I -"\u00CC" => "I" - -# à => I -"\u00CD" => "I" - -# ÃŽ => I -"\u00CE" => "I" - -# à => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# à => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ã’ => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Å’ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ãœ => U -"\u00DC" => "U" - -# à => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# Ã¥ => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# Å“ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# ï¬ => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/protwords.txt solr-3.3/contrib/clustering/src/test/resources/solr/conf/protwords.txt --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/protwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/protwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/schema.xml solr-3.3/contrib/clustering/src/test/resources/solr/conf/schema.xml --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/schema.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/schema.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,347 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - text - - - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/solrconfig.xml solr-3.3/contrib/clustering/src/test/resources/solr/conf/solrconfig.xml --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,561 +0,0 @@ - - - - - - ${solr.abortOnConfigurationError:true} - - - ${solr.data.dir:./solr/data} - - - - - false - - 10 - - - - 32 - 2147483647 - 10000 - 1000 - 10000 - - - - - - - - - - - single - - - - - false - 32 - 10 - - - 2147483647 - 10000 - - - false - - - - - - - - - - - - - - - - - - - - - - 1024 - - - - - - - - - - - - - true - - - - - - - - 50 - - - 200 - - - - - - - - - solr 0 10 - rocks 0 10 - static newSearcher warming query from solrconfig.xml - - - - - - - fast_warm 0 10 - static firstSearcher warming query from solrconfig.xml - - - - - false - - - 2 - - - - - - - - - - - - - - - - - - - - - - - explicit - - - - clustering - - - - - - - - explicit - - - - doc-clustering - - - - - - - - - - - default - org.carrot2.clustering.lingo.LingoClusteringAlgorithm - - - stc - org.carrot2.clustering.stc.STCClusteringAlgorithm - - - mock - org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm - - - - - - - - mock - org.apache.solr.handler.clustering.MockDocumentClusteringEngine - - - - - - - - - - - - - - - - - - - - - - - - standard - solrpingquery - all - - - - - - - explicit - true - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - 5 - - - - - - - - - - solr - - - - - diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/spellings.txt solr-3.3/contrib/clustering/src/test/resources/solr/conf/spellings.txt --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/spellings.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/spellings.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -pizza -history \ No newline at end of file diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/stopwords.txt solr-3.3/contrib/clustering/src/test/resources/solr/conf/stopwords.txt --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/stopwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/stopwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -#Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -s -such -t -that -the -their -then -there -these -they -this -to -was -will -with - diff -Nru solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/synonyms.txt solr-3.3/contrib/clustering/src/test/resources/solr/conf/synonyms.txt --- solr-1.4.0+ds1/contrib/clustering/src/test/resources/solr/conf/synonyms.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/clustering/src/test/resources/solr/conf/synonyms.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaa => aaaa -bbb => bbbb1 bbbb2 -ccc => cccc1,cccc2 -a\=>a => b\=>b -a\,a => b\,b -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/build.xml solr-3.3/contrib/dataimporthandler/build.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/build.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/build.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,210 +0,0 @@ - - - - - - - - - - - - - Data Import Handler - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/CHANGES.txt solr-3.3/contrib/dataimporthandler/CHANGES.txt --- solr-1.4.0+ds1/contrib/dataimporthandler/CHANGES.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/CHANGES.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,361 +0,0 @@ - Apache Solr - DataImportHandler - Release Notes - -Introduction ------------- -DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and -HTTP data sources quick and easy. - - -$Id: CHANGES.txt 829936 2009-10-26 20:23:00Z gsingers $ - -================== Release 1.4.0 ================== - -Upgrading from Solr 1.3 ------------------------ - -Evaluator API has been changed in a non back-compatible way. Users who have developed custom Evaluators will need -to change their code according to the new API for it to work. See SOLR-996 for details. - -The formatDate evaluator's syntax has been changed. The new syntax is formatDate(, ''). -For example, formatDate(x.date, 'yyyy-MM-dd'). In the old syntax, the date string was written without a single-quotes. -The old syntax has been deprecated and will be removed in 1.5, until then, using the old syntax will log a warning. - -The Context API has been changed in a non back-compatible way. In particular, the Context.currentProcess() method -now returns a String describing the type of the current import process instead of an int. Similarily, the public -constants in Context viz. FULL_DUMP, DELTA_DUMP and FIND_DELTA are changed to a String type. See SOLR-969 for details. - -The EntityProcessor API has been simplified by moving logic for applying transformers and handling multi-row outputs -from Transformers into an EntityProcessorWrapper class. The EntityProcessor#destroy is now called once per -parent-row at the end of row (end of data). A new method EntityProcessor#close is added which is called at the end -of import. - -In Solr 1.3, if the last_index_time was not available (first import) and a delta-import was requested, a full-import -was run instead. This is no longer the case. In Solr 1.4 delta import is run with last_index_time as the epoch -date (January 1, 1970, 00:00:00 GMT) if last_index_time is not available. - -Detailed Change List ----------------------- - -New Features ----------------------- -1. SOLR-768: Set last_index_time variable in full-import command. - (Wojtek Piaseczny, Noble Paul via shalin) - -2. SOLR-811: Allow a "deltaImportQuery" attribute in SqlEntityProcessor which is used for delta imports - instead of DataImportHandler manipulating the SQL itself. - (Noble Paul via shalin) - -3. SOLR-842: Better error handling in DataImportHandler with options to abort, skip and continue imports. - (Noble Paul, shalin) - -4. SOLR-833: A DataSource to read data from a field as a reader. This can be used, for example, to read XMLs - residing as CLOBs or BLOBs in databases. - (Noble Paul via shalin) - -5. SOLR-887: A Transformer to strip HTML tags. - (Ahmed Hammad via shalin) - -6. SOLR-886: DataImportHandler should rollback when an import fails or it is aborted - (shalin) - -7. SOLR-891: A Transformer to read strings from Clob type. - (Noble Paul via shalin) - -8. SOLR-812: Configurable JDBC settings in JdbcDataSource including optimized defaults for read only mode. - (David Smiley, Glen Newton, shalin) - -9. SOLR-910: Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config. - (Ahmed Hammad via shalin) - -10.SOLR-938: Add event listener API for import start and end. - (Kay Kay, Noble Paul via shalin) - -11.SOLR-801: Add support for configurable pre-import and post-import delete query per root-entity. - (Noble Paul via shalin) - -12.SOLR-988: Add a new scope for session data stored in Context to store objects across imports. - (Noble Paul via shalin) - -13.SOLR-980: A PlainTextEntityProcessor which can read from any DataSource and output a String. - (Nathan Adams, Noble Paul via shalin) - -14.SOLR-1003: XPathEntityprocessor must allow slurping all text from a given xml node and its children. - (Noble Paul via shalin) - -15.SOLR-1001: Allow variables in various attributes of RegexTransformer, HTMLStripTransformer - and NumberFormatTransformer. - (Fergus McMenemie, Noble Paul, shalin) - -16.SOLR-989: Expose running statistics from the Context API. - (Noble Paul, shalin) - -17.SOLR-996: Expose Context to Evaluators. - (Noble Paul, shalin) - -18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity. - (Jon Baer, Noble Paul via shalin) - -19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field - of TemplateTransformer in other transformers, among other things. - (Fergus McMenemie, Noble Paul via shalin) - -20.SOLR-1066: New methods in Context to expose Script details. ScriptTransformer changed to read scripts - through the new API methods. - (Noble Paul via shalin) - -21.SOLR-1062: A LogTransformer which can log data in a given template format. - (Jon Baer, Noble Paul via shalin) - -22.SOLR-1065: A ContentStreamDataSource which can accept HTTP POST data in a content stream. This can be used to - push data to Solr instead of just pulling it from DB/Files/URLs. - (Noble Paul via shalin) - -23.SOLR-1061: Improve RegexTransformer to create multiple columns from regex groups. - (Noble Paul via shalin) - -24.SOLR-1059: Special flags introduced for deleting documents by query or id, skipping rows and stopping further - transforms. Use $deleteDocById, $deleteDocByQuery for deleting by id and query respectively. - Use $skipRow to skip the current row but continue with the document. Use $stopTransform to stop - further transformers. New methods are introduced in Context for deleting by id and query. - (Noble Paul, Fergus McMenemie, shalin) - -25.SOLR-1076: JdbcDataSource should resolve variables in all its configuration parameters. - (shalin) - -26.SOLR-1055: Make DIH JdbcDataSource easily extensible by making the createConnectionFactory method protected and - return a Callable object. - (Noble Paul, shalin) - -27.SOLR-1058: JdbcDataSource can lookup javax.sql.DataSource using JNDI. Use a jndiName attribute to specify the - location of the data source. - (Jason Shepherd, Noble Paul via shalin) - -28.SOLR-1083: An Evaluator for escaping query characters. - (Noble Paul, shalin) - -29.SOLR-934: A MailEntityProcessor to enable indexing mails from POP/IMAP sources into a solr index. - (Preetam Rao, shalin) - -30.SOLR-1060: A LineEntityProcessor which can stream lines of text from a given file to be indexed directly or - for processing with transformers and child entities. - (Fergus McMenemie, Noble Paul, shalin) - -31.SOLR-1127: Add support for field name to be templatized. - (Noble Paul, shalin) - -32.SOLR-1092: Added a new command named 'import' which does not automatically clean the index. This is useful and - more appropriate when one needs to import only some of the entities. - (Noble Paul via shalin) - -33.SOLR-1153: 'deltaImportQuery' is honored on child entities as well (noble) - -34.SOLR-1230: Enhanced dataimport.jsp to work with all DataImportHandler request handler configurations, - rather than just a hardcoded /dataimport handler. (ehatcher) - -35.SOLR-1235: disallow period (.) in entity names (noble) - -36.SOLR-1234: Multiple DIH does not work because all of them write to dataimport.properties. - Use the handler name as the properties file name (noble) - -37.SOLR-1348: Support binary field type in convertType logic in JdbcDataSource (shalin) - -38.SOLR-1406: Make FileDataSource and FileListEntityProcessor to be more extensible (Luke Forehand, shalin) - -39.SOLR-1437 : XPathEntityProcessor can deal with xpath syntaxes such as //tagname , /root//tagname (Fergus McMenemie via noble) - -Optimizations ----------------------- -1. SOLR-846: Reduce memory consumption during delta import by removing keys when used - (Ricky Leung, Noble Paul via shalin) - -2. SOLR-974: DataImportHandler skips commit if no data has been updated. - (Wojtek Piaseczny, shalin) - -3. SOLR-1004: Check for abort more frequently during delta-imports. - (Marc Sturlese, shalin) - -4. SOLR-1098: DateFormatTransformer can cache the format objects. - (Noble Paul via shalin) - -5. SOLR-1465: Replaced string concatenations with StringBuilder append calls in XPathRecordReader. - (Mark Miller, shalin) - - -Bug Fixes ----------------------- -1. SOLR-800: Deep copy collections to avoid ConcurrentModificationException in XPathEntityprocessor while streaming - (Kyle Morrison, Noble Paul via shalin) - -2. SOLR-823: Request parameter variables ${dataimporter.request.xxx} are not resolved - (Mck SembWever, Noble Paul, shalin) - -3. SOLR-728: Add synchronization to avoid race condition of multiple imports working concurrently - (Walter Ferrara, shalin) - -4. SOLR-742: Add ability to create dynamic fields with custom DataImportHandler transformers - (Wojtek Piaseczny, Noble Paul, shalin) - -5. SOLR-832: Rows parameter is not honored in non-debug mode and can abort a running import in debug mode. - (Akshay Ukey, shalin) - -6. SOLR-838: The VariableResolver obtained from a DataSource's context does not have current data. - (Noble Paul via shalin) - -7. SOLR-864: DataImportHandler does not catch and log Errors (shalin) - -8. SOLR-873: Fix case-sensitive field names and columns (Jon Baer, shalin) - -9. SOLR-893: Unable to delete documents via SQL and deletedPkQuery with deltaimport - (Dan Rosher via shalin) - -10. SOLR-888: DateFormatTransformer cannot convert non-string type - (Amit Nithian via shalin) - -11. SOLR-841: DataImportHandler should throw exception if a field does not have column attribute - (Michael Henson, shalin) - -12. SOLR-884: CachedSqlEntityProcessor should check if the cache key is present in the query results - (Noble Paul via shalin) - -13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores. - (Ryuuichi Kumai via shalin) - -14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content. - (Fergus McMenemie, Noble Paul via shalin) - -15.SOLR-1000: FileListEntityProcessor should not apply fileName filter to directory names. - (Fergus McMenemie via shalin) - -16.SOLR-1009: Repeated column names result in duplicate values. - (Fergus McMenemie, Noble Paul via shalin) - -17.SOLR-1017: Fix thread-safety issue with last_index_time for concurrent imports in multiple cores due to unsafe usage - of SimpleDateFormat by multiple threads. - (Ryuuichi Kumai via shalin) - -18.SOLR-1024: Calling abort on DataImportHandler import commits data instead of calling rollback. - (shalin) - -19.SOLR-1037: DIH should not add null values in a row returned by EntityProcessor to documents. - (shalin) - -20.SOLR-1040: XPathEntityProcessor fails with an xpath like /feed/entry/link[@type='text/html']/@href - (Noble Paul via shalin) - -21.SOLR-1042: Fix memory leak in DIH by making TemplateString non-static member in VariableResolverImpl - (Ryuuichi Kumai via shalin) - -22.SOLR-1053: IndexOutOfBoundsException in SolrWriter.getResourceAsString when size of data-config.xml is a - multiple of 1024 bytes. - (Herb Jiang via shalin) - -23.SOLR-1077: IndexOutOfBoundsException with useSolrAddSchema in XPathEntityProcessor. - (Sam Keen, Noble Paul via shalin) - -24.SOLR-1080: RegexTransformer should not replace if regex is not matched. - (Noble Paul, Fergus McMenemie via shalin) - -25.SOLR-1090: DataImportHandler should load the data-config.xml using UTF-8 encoding. - (Rui Pereira, shalin) - -26.SOLR-1146: ConcurrentModificationException in DataImporter.getStatusMessages - (Walter Ferrara, Noble Paul via shalin) - -27.SOLR-1229: Fixes for deletedPkQuery, particularly when using transformed Solr unique id's - (Lance Norskog, Noble Paul via ehatcher) - -28.SOLR-1286: Fix the commit parameter always defaulting to "true" even if "false" is explicitly passed in. - (Jay Hill, Noble Paul via ehatcher) - -29.SOLR-1323: Reset XPathEntityProcessor's $hasMore/$nextUrl when fetching next URL (noble, ehatcher) - -30.SOLR-1450: Jdbc connection properties such as batchSize are not applied if the driver jar is placed - in solr_home/lib. - (Steve Sun via shalin) - -31.SOLR-1474: Delta-import should run even if last_index_time is not set. - (shalin) - - -Documentation ----------------------- -1. SOLR-1369: Add HSQLDB Jar to example-DIH, unzip database and update instructions. - -Other ----------------------- -1. SOLR-782: Refactored SolrWriter to make it a concrete class and removed wrappers over SolrInputDocument. - Refactored to load Evaluators lazily. Removed multiple document nodes in the configuration xml. - Removed support for 'default' variables, they are automatically available as request parameters. - (Noble Paul via shalin) - -2. SOLR-964: XPathEntityProcessor now ignores DTD validations - (Fergus McMenemie, Noble Paul via shalin) - -3. SOLR-1029: Standardize Evaluator parameter parsing and added helper functions for parsing all evaluator - parameters in a standard way. - (Noble Paul, shalin) - -4. SOLR-1081: Change EventListener to be an interface so that components such as an EntityProcessor or a Transformer - can act as an event listener. - (Noble Paul, shalin) - -5. SOLR-1027: Alias the 'dataimporter' namespace to a shorter name 'dih'. - (Noble Paul via shalin) - -6. SOLR-1084: Better error reporting when entity name is a reserved word and data-config.xml root node - is not . - (Noble Paul via shalin) - -7. SOLR-1087: Deprecate 'where' attribute in CachedSqlEntityProcessor in favor of cacheKey and cacheLookup. - (Noble Paul via shalin) - -8. SOLR-969: Change the FULL_DUMP, DELTA_DUMP, FIND_DELTA constants in Context to String. - Change Context.currentProcess() to return a string instead of an integer. - (Kay Kay, Noble Paul, shalin) - -9. SOLR-1120: Simplified EntityProcessor API by moving logic for applying transformers and handling multi-row outputs - from Transformers into an EntityProcessorWrapper class. The behavior of the method - EntityProcessor#destroy has been modified to be called once per parent-row at the end of row. A new - method EntityProcessor#close is added which is called at the end of import. A new method - Context#getResolvedEntityAttribute is added which returns the resolved value of an entity's attribute. - Introduced a DocWrapper which takes care of maintaining document level session variables. - (Noble Paul, shalin) - -10.SOLR-1265: Add variable resolving for URLDataSource properties like baseUrl. (Chris Eldredge via ehatcher) - -11.SOLR-1269: Better error messages from JdbcDataSource when JDBC Driver name or SQL is incorrect. - (ehatcher, shalin) - -================== Release 1.3.0 20080915 ================== - -Status ------- -This is the first release since DataImportHandler was added to the contrib solr distribution. -The following changes list changes since the code was introduced, not since -the first official release. - - -Detailed Change List --------------------- - -New Features -1. SOLR-700: Allow configurable locales through a locale attribute in fields for NumberFormatTransformer. - (Stefan Oestreicher, shalin) - -Changes in runtime behavior - -Bug Fixes -1. SOLR-704: NumberFormatTransformer can silently ignore part of the string while parsing. Now it tries to - use the complete string for parsing. Failure to do so will result in an exception. - (Stefan Oestreicher via shalin) - -2. SOLR-729: Context.getDataSource(String) gives current entity's DataSource instance regardless of argument. - (Noble Paul, shalin) - -3. SOLR-726: Jdbc Drivers and DataSources fail to load if placed in multicore sharedLib or core's lib directory. - (Walter Ferrara, Noble Paul, shalin) - -Other Changes - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template solr-3.3/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template --- solr-1.4.0+ds1/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/solr-dataimporthandler-extras-pom.xml.template 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @maven_version@ - - - org.apache.solr - solr-dataimporthandler-extras - Apache Solr DataImportHandler Extras - @maven_version@ - Apache Solr DataImportHandler Extras - jar - - - - javax.activation - activation - 1.1 - - - javax.mail - mail - 1.4.1 - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template solr-3.3/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template --- solr-1.4.0+ds1/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/solr-dataimporthandler-pom.xml.template 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @maven_version@ - - - org.apache.solr - solr-dataimporthandler - Apache Solr DataImportHandler - @maven_version@ - Apache Solr DataImportHandler - jar - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,599 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import com.sun.mail.imap.IMAPMessage; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.utils.ParseUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.mail.*; -import javax.mail.internet.AddressException; -import javax.mail.internet.ContentType; -import javax.mail.internet.InternetAddress; -import javax.mail.internet.MimeMessage; -import javax.mail.search.AndTerm; -import javax.mail.search.ComparisonTerm; -import javax.mail.search.ReceivedDateTerm; -import javax.mail.search.SearchTerm; -import java.io.InputStream; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; - -/** - * An EntityProcessor instance which can index emails along with their attachments from POP3 or IMAP sources. Refer to - * http://wiki.apache.org/solr/DataImportHandler for more - * details. This API is experimental and subject to change - * - * @version $Id: MailEntityProcessor.java 818419 2009-09-24 09:58:10Z shalin $ - * @since solr 1.4 - */ -public class MailEntityProcessor extends EntityProcessorBase { - - public static interface CustomFilter { - public SearchTerm getCustomSearch(Folder folder); - } - - public void init(Context context) { - super.init(context); - // set attributes using XXX getXXXFromContext(attribute, defualtValue); - // applies variable resolver and return default if value is not found or null - // REQUIRED : connection and folder info - user = getStringFromContext("user", null); - password = getStringFromContext("password", null); - host = getStringFromContext("host", null); - protocol = getStringFromContext("protocol", null); - folderNames = getStringFromContext("folders", null); - // validate - if (host == null || protocol == null || user == null || password == null - || folderNames == null) - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'user|password|protocol|host|folders' are required attributes"); - - //OPTIONAL : have defaults and are optional - recurse = getBoolFromContext("recurse", true); - String excludes = getStringFromContext("exclude", ""); - if (excludes != null && !excludes.trim().equals("")) { - exclude = Arrays.asList(excludes.split(",")); - } - String includes = getStringFromContext("include", ""); - if (includes != null && !includes.trim().equals("")) { - include = Arrays.asList(includes.split(",")); - } - batchSize = getIntFromContext("batchSize", 20); - customFilter = getStringFromContext("customFilter", ""); - String s = getStringFromContext("fetchMailsSince", ""); - if (s != null) - try { - fetchMailsSince = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(s); - } catch (ParseException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid value for fetchMailSince: " + s, e); - } - - fetchSize = getIntFromContext("fetchSize", 32 * 1024); - cTimeout = getIntFromContext("connectTimeout", 30 * 1000); - rTimeout = getIntFromContext("readTimeout", 60 * 1000); - processAttachment = getBoolFromContext("processAttachement", true); - - logConfig(); - } - - public Map nextRow() { - Message mail; - Map row = null; - do { - // try till there is a valid document or folders get exhausted. - // when mail == NULL, it means end of processing - mail = getNextMail(); - if (mail != null) - row = getDocumentFromMail(mail); - } while (row == null && mail != null); - return row; - } - - private Message getNextMail() { - if (!connected) { - if (!connectToMailBox()) - return null; - connected = true; - } - if (folderIter == null) { - createFilters(); - folderIter = new FolderIterator(mailbox); - } - // get next message from the folder - // if folder is exhausted get next folder - // loop till a valid mail or all folders exhausted. - while (msgIter == null || !msgIter.hasNext()) { - Folder next = folderIter.hasNext() ? folderIter.next() : null; - if (next == null) { - return null; - } - msgIter = new MessageIterator(next, batchSize); - } - return msgIter.next(); - } - - private Map getDocumentFromMail(Message mail) { - Map row = new HashMap(); - try { - addPartToDocument(mail, row, true); - return row; - } catch (Exception e) { - return null; - } - } - - public void addPartToDocument(Part part, Map row, boolean outerMost) throws Exception { - if (part instanceof Message) { - addEnvelopToDocument(part, row); - } - - String ct = part.getContentType(); - ContentType ctype = new ContentType(ct); - if (part.isMimeType("multipart/*")) { - Multipart mp = (Multipart) part.getContent(); - int count = mp.getCount(); - if (part.isMimeType("multipart/alternative")) - count = 1; - for (int i = 0; i < count; i++) - addPartToDocument(mp.getBodyPart(i), row, false); - } else if (part.isMimeType("message/rfc822")) { - addPartToDocument((Part) part.getContent(), row, false); - } else { - String disp = part.getDisposition(); - if (!processAttachment || (disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT))) return; - InputStream is = part.getInputStream(); - String fileName = part.getFileName(); - String content = ParseUtils.getStringContent(is, TikaConfig.getDefaultConfig(), ctype.getBaseType().toLowerCase()); - if (disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT)) { - if (row.get(ATTACHMENT) == null) - row.put(ATTACHMENT, new ArrayList()); - List contents = (List) row.get(ATTACHMENT); - contents.add(content); - row.put(ATTACHMENT, contents); - if (row.get(ATTACHMENT_NAMES) == null) - row.put(ATTACHMENT_NAMES, new ArrayList()); - List names = (List) row.get(ATTACHMENT_NAMES); - names.add(fileName); - row.put(ATTACHMENT_NAMES, names); - } else { - if (row.get(CONTENT) == null) - row.put(CONTENT, new ArrayList()); - List contents = (List) row.get(CONTENT); - contents.add(content); - row.put(CONTENT, contents); - } - } - } - - private void addEnvelopToDocument(Part part, Map row) throws MessagingException { - MimeMessage mail = (MimeMessage) part; - Address[] adresses; - if ((adresses = mail.getFrom()) != null && adresses.length > 0) - row.put(FROM, adresses[0].toString()); - - List to = new ArrayList(); - if ((adresses = mail.getRecipients(Message.RecipientType.TO)) != null) - addAddressToList(adresses, to); - if ((adresses = mail.getRecipients(Message.RecipientType.CC)) != null) - addAddressToList(adresses, to); - if ((adresses = mail.getRecipients(Message.RecipientType.BCC)) != null) - addAddressToList(adresses, to); - if (to.size() > 0) - row.put(TO_CC_BCC, to); - - row.put(MESSAGE_ID, mail.getMessageID()); - row.put(SUBJECT, mail.getSubject()); - - Date d = mail.getSentDate(); - if (d != null) { - row.put(SENT_DATE, d); - } - - List flags = new ArrayList(); - for (Flags.Flag flag : mail.getFlags().getSystemFlags()) { - if (flag == Flags.Flag.ANSWERED) - flags.add(FLAG_ANSWERED); - else if (flag == Flags.Flag.DELETED) - flags.add(FLAG_DELETED); - else if (flag == Flags.Flag.DRAFT) - flags.add(FLAG_DRAFT); - else if (flag == Flags.Flag.FLAGGED) - flags.add(FLAG_FLAGGED); - else if (flag == Flags.Flag.RECENT) - flags.add(FLAG_RECENT); - else if (flag == Flags.Flag.SEEN) - flags.add(FLAG_SEEN); - } - flags.addAll(Arrays.asList(mail.getFlags().getUserFlags())); - row.put(FLAGS, flags); - - String[] hdrs = mail.getHeader("X-Mailer"); - if (hdrs != null) - row.put(XMAILER, hdrs[0]); - } - - - private void addAddressToList(Address[] adresses, List to) throws AddressException { - for (Address address : adresses) { - to.add(address.toString()); - InternetAddress ia = (InternetAddress) address; - if (ia.isGroup()) { - InternetAddress[] group = ia.getGroup(false); - for (InternetAddress member : group) - to.add(member.toString()); - } - } - } - - private boolean connectToMailBox() { - try { - Properties props = new Properties(); - props.setProperty("mail.store.protocol", protocol); - props.setProperty("mail.imap.fetchsize", "" + fetchSize); - props.setProperty("mail.imap.timeout", "" + rTimeout); - props.setProperty("mail.imap.connectiontimeout", "" + cTimeout); - Session session = Session.getDefaultInstance(props, null); - mailbox = session.getStore(protocol); - mailbox.connect(host, user, password); - LOG.info("Connected to mailbox"); - return true; - } catch (MessagingException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Connection failed", e); - } - } - - private void createFilters() { - if (fetchMailsSince != null) { - filters.add(new MailsSinceLastCheckFilter(fetchMailsSince)); - } - if (customFilter != null && !customFilter.equals("")) { - try { - Class cf = Class.forName(customFilter); - Object obj = cf.newInstance(); - if (obj instanceof CustomFilter) { - filters.add((CustomFilter) obj); - } - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Custom filter could not be created", e); - } - } - } - - private void logConfig() { - if (!LOG.isInfoEnabled()) return; - StringBuffer config = new StringBuffer(); - config.append("user : ").append(user).append(System.getProperty("line.separator")); - config.append("pwd : ").append(password).append(System.getProperty("line.separator")); - config.append("protocol : ").append(protocol).append(System.getProperty("line.separator")); - config.append("host : ").append(host).append(System.getProperty("line.separator")); - config.append("folders : ").append(folderNames).append(System.getProperty("line.separator")); - config.append("recurse : ").append(recurse).append(System.getProperty("line.separator")); - config.append("exclude : ").append(exclude.toString()).append(System.getProperty("line.separator")); - config.append("include : ").append(include.toString()).append(System.getProperty("line.separator")); - config.append("batchSize : ").append(batchSize).append(System.getProperty("line.separator")); - config.append("fetchSize : ").append(fetchSize).append(System.getProperty("line.separator")); - config.append("read timeout : ").append(rTimeout).append(System.getProperty("line.separator")); - config.append("conection timeout : ").append(cTimeout).append(System.getProperty("line.separator")); - config.append("custom filter : ").append(customFilter).append(System.getProperty("line.separator")); - config.append("fetch mail since : ").append(fetchMailsSince).append(System.getProperty("line.separator")); - LOG.info(config.toString()); - } - - class FolderIterator implements Iterator { - private Store mailbox; - private List topLevelFolders; - private List folders = null; - private Folder lastFolder = null; - - public FolderIterator(Store mailBox) { - this.mailbox = mailBox; - folders = new ArrayList(); - getTopLevelFolders(mailBox); - } - - public boolean hasNext() { - return !folders.isEmpty(); - } - - public Folder next() { - try { - boolean hasMessages = false; - Folder next; - do { - if (lastFolder != null) { - lastFolder.close(false); - lastFolder = null; - } - if (folders.isEmpty()) { - mailbox.close(); - return null; - } - next = folders.remove(0); - if (next != null) { - String fullName = next.getFullName(); - if (!excludeFolder(fullName)) { - hasMessages = (next.getType() & Folder.HOLDS_MESSAGES) != 0; - next.open(Folder.READ_ONLY); - lastFolder = next; - LOG.info("Opened folder : " + fullName); - } - if (recurse && ((next.getType() & Folder.HOLDS_FOLDERS) != 0)) { - Folder[] children = next.list(); - LOG.info("Added its children to list : "); - for (int i = children.length - 1; i >= 0; i--) { - folders.add(0, children[i]); - LOG.info("child name : " + children[i].getFullName()); - } - if (children.length == 0) - LOG.info("NO children : "); - } - } - } - while (!hasMessages); - return next; - } catch (MessagingException e) { - //throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - // "Folder open failed", e); - } - return null; - } - - public void remove() { - throw new UnsupportedOperationException("Its read only mode..."); - } - - private void getTopLevelFolders(Store mailBox) { - if (folderNames != null) - topLevelFolders = Arrays.asList(folderNames.split(",")); - for (int i = 0; topLevelFolders != null && i < topLevelFolders.size(); i++) { - try { - folders.add(mailbox.getFolder(topLevelFolders.get(i))); - } catch (MessagingException e) { - // skip bad ones unless its the last one and still no good folder - if (folders.size() == 0 && i == topLevelFolders.size() - 1) - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Folder retreival failed"); - } - } - if (topLevelFolders == null || topLevelFolders.size() == 0) { - try { - folders.add(mailBox.getDefaultFolder()); - } catch (MessagingException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Folder retreival failed"); - } - } - } - - private boolean excludeFolder(String name) { - for (String s : exclude) { - if (name.matches(s)) - return true; - } - for (String s : include) { - if (name.matches(s)) - return false; - } - return include.size() > 0; - } - } - - class MessageIterator implements Iterator { - private Folder folder; - private Message[] messagesInCurBatch; - private int current = 0; - private int currentBatch = 0; - private int batchSize = 0; - private int totalInFolder = 0; - private boolean doBatching = true; - - public MessageIterator(Folder folder, int batchSize) { - try { - this.folder = folder; - this.batchSize = batchSize; - SearchTerm st = getSearchTerm(); - if (st != null) { - doBatching = false; - messagesInCurBatch = folder.search(st); - totalInFolder = messagesInCurBatch.length; - folder.fetch(messagesInCurBatch, fp); - current = 0; - LOG.info("Total messages : " + totalInFolder); - LOG.info("Search criteria applied. Batching disabled"); - } else { - totalInFolder = folder.getMessageCount(); - LOG.info("Total messages : " + totalInFolder); - getNextBatch(batchSize, folder); - } - } catch (MessagingException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Message retreival failed", e); - } - } - - private void getNextBatch(int batchSize, Folder folder) throws MessagingException { - // after each batch invalidate cache - if (messagesInCurBatch != null) { - for (Message m : messagesInCurBatch) { - if (m instanceof IMAPMessage) - ((IMAPMessage) m).invalidateHeaders(); - } - } - int lastMsg = (currentBatch + 1) * batchSize; - lastMsg = lastMsg > totalInFolder ? totalInFolder : lastMsg; - messagesInCurBatch = folder.getMessages(currentBatch * batchSize + 1, lastMsg); - folder.fetch(messagesInCurBatch, fp); - current = 0; - currentBatch++; - LOG.info("Current Batch : " + currentBatch); - LOG.info("Messages in this batch : " + messagesInCurBatch.length); - } - - public boolean hasNext() { - boolean hasMore = current < messagesInCurBatch.length; - if (!hasMore && doBatching - && currentBatch * batchSize < totalInFolder) { - // try next batch - try { - getNextBatch(batchSize, folder); - hasMore = current < messagesInCurBatch.length; - } catch (MessagingException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Message retreival failed", e); - } - } - return hasMore; - } - - public Message next() { - return hasNext() ? messagesInCurBatch[current++] : null; - } - - public void remove() { - throw new UnsupportedOperationException("Its read only mode..."); - } - - private SearchTerm getSearchTerm() { - if (filters.size() == 0) - return null; - if (filters.size() == 1) - return filters.get(0).getCustomSearch(folder); - SearchTerm last = filters.get(0).getCustomSearch(folder); - for (int i = 1; i < filters.size(); i++) { - CustomFilter filter = filters.get(i); - SearchTerm st = filter.getCustomSearch(folder); - if (st != null) { - last = new AndTerm(last, st); - } - } - return last; - } - } - - class MailsSinceLastCheckFilter implements CustomFilter { - - private Date since; - - public MailsSinceLastCheckFilter(Date date) { - since = date; - } - - public SearchTerm getCustomSearch(Folder folder) { - return new ReceivedDateTerm(ComparisonTerm.GE, since); - } - } - - // user settings stored in member variables - private String user; - private String password; - private String host; - private String protocol; - - private String folderNames; - private List exclude = new ArrayList(); - private List include = new ArrayList(); - private boolean recurse; - - private int batchSize; - private int fetchSize; - private int cTimeout; - private int rTimeout; - - private Date fetchMailsSince; - private String customFilter; - - private boolean processAttachment = true; - - // holds the current state - private Store mailbox; - private boolean connected = false; - private FolderIterator folderIter; - private MessageIterator msgIter; - private List filters = new ArrayList(); - private static FetchProfile fp = new FetchProfile(); - private static final Logger LOG = LoggerFactory.getLogger(DataImporter.class); - - // diagnostics - private int rowCount = 0; - - static { - fp.add(FetchProfile.Item.ENVELOPE); - fp.add(FetchProfile.Item.FLAGS); - fp.add("X-Mailer"); - } - - // Fields To Index - // single valued - private static final String MESSAGE_ID = "messageId"; - private static final String SUBJECT = "subject"; - private static final String FROM = "from"; - private static final String SENT_DATE = "sentDate"; - private static final String XMAILER = "xMailer"; - // multi valued - private static final String TO_CC_BCC = "allTo"; - private static final String FLAGS = "flags"; - private static final String CONTENT = "content"; - private static final String ATTACHMENT = "attachment"; - private static final String ATTACHMENT_NAMES = "attachmentNames"; - // flag values - private static final String FLAG_ANSWERED = "answered"; - private static final String FLAG_DELETED = "deleted"; - private static final String FLAG_DRAFT = "draft"; - private static final String FLAG_FLAGGED = "flagged"; - private static final String FLAG_RECENT = "recent"; - private static final String FLAG_SEEN = "seen"; - - private int getIntFromContext(String prop, int ifNull) { - int v = ifNull; - try { - String val = context.getEntityAttribute(prop); - if (val != null) { - val = context.getVariableResolver().replaceTokens(val); - v = Integer.valueOf(val); - } - } catch (NumberFormatException e) { - //do nothing - } - return v; - } - - private boolean getBoolFromContext(String prop, boolean ifNull) { - boolean v = ifNull; - String val = context.getEntityAttribute(prop); - if (val != null) { - val = context.getVariableResolver().replaceTokens(val); - v = Boolean.valueOf(val); - } - return v; - } - - private String getStringFromContext(String prop, String ifNull) { - String v = ifNull; - String val = context.getEntityAttribute(prop); - if (val != null) { - val = context.getVariableResolver().replaceTokens(val); - v = val; - } - return v; - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,211 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import junit.framework.Assert; -import org.apache.solr.common.SolrInputDocument; -import org.junit.Ignore; -import org.junit.Test; - -import java.text.ParseException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -// Test mailbox is like this: foldername(mailcount) -// top1(2) -> child11(6) -// -> child12(0) -// top2(2) -> child21(1) -// -> grandchild211(2) -// -> grandchild212(1) -// -> child22(2) - -/** - * Test for MailEntityProcessor. The tests are marked as ignored because we'd need a mail server (real or mocked) for - * these to work. - * - * TODO: Find a way to make the tests actually test code - * - * @version $Id: TestMailEntityProcessor.java 826074 2009-10-16 20:34:16Z shalin $ - * @see org.apache.solr.handler.dataimport.MailEntityProcessor - * @since solr 1.4 - */ -public class TestMailEntityProcessor { - - // Credentials - private static final String user = "user"; - private static final String password = "password"; - private static final String host = "host"; - private static final String protocol = "imaps"; - - private static Map paramMap = new HashMap(); - - @Test - @Ignore - public void testConnection() { - // also tests recurse = false and default settings - paramMap.put("folders", "top2"); - paramMap.put("recurse", "false"); - paramMap.put("processAttachement", "false"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - DataConfig.Entity ent = di.getConfig().document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals("top1 did not return 2 messages", swi.docs.size(), 2); - } - - @Test - @Ignore - public void testRecursion() { - paramMap.put("folders", "top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - DataConfig.Entity ent = di.getConfig().document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8); - } - - @Test - @Ignore - public void testExclude() { - paramMap.put("folders", "top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("exclude", ".*grandchild.*"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - DataConfig.Entity ent = di.getConfig().document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5); - } - - @Test - @Ignore - public void testInclude() { - paramMap.put("folders", "top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("include", ".*grandchild.*"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - DataConfig.Entity ent = di.getConfig().document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3); - } - - @Test - @Ignore - public void testIncludeAndExclude() { - paramMap.put("folders", "top1,top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("exclude", ".*top1.*"); - paramMap.put("include", ".*grandchild.*"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - DataConfig.Entity ent = di.getConfig().document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3); - } - - @Test - @Ignore - public void testFetchTimeSince() throws ParseException { - paramMap.put("folders", "top1/child11"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("fetchMailsSince", "2008-12-26 00:00:00"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - DataConfig.Entity ent = di.getConfig().document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3); - } - - private String getConfigFromMap(Map params) { - String conf = - "" + - "" + - "" + - "" + - ""; - params.put("user", user); - params.put("password", password); - params.put("host", host); - params.put("protocol", protocol); - StringBuilder attribs = new StringBuilder(""); - for (String key : params.keySet()) - attribs.append(" ").append(key).append("=" + "\"").append(params.get(key)).append("\""); - attribs.append(" "); - return conf.replace("someconfig", attribs.toString()); - } - - static class SolrWriterImpl extends SolrWriter { - List docs = new ArrayList(); - Boolean deleteAllCalled; - Boolean commitCalled; - - public SolrWriterImpl() { - super(null, "."); - } - - public boolean upload(SolrInputDocument doc) { - return docs.add(doc); - } - - public void log(int event, String name, Object row) { - // Do nothing - } - - public void doDeleteAll() { - deleteAllCalled = Boolean.TRUE; - } - - public void commit(boolean b) { - commitCalled = Boolean.TRUE; - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTest.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTest.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTest.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,237 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.core.SolrCore; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.util.AbstractSolrTestCase; -import org.apache.solr.common.util.NamedList; - -import java.io.IOException; -import java.io.File; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - *

- * Abstract base class for DataImportHandler tests - *

- *

- * This API is experimental and subject to change - * - * @version $Id: AbstractDataImportHandlerTest.java 824910 2009-10-13 20:28:04Z ehatcher $ - * @since solr 1.3 - */ -public abstract class AbstractDataImportHandlerTest extends - AbstractSolrTestCase { - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - // remove dataimport.properties - File f = new File("solr/conf/dataimport.properties"); - log.info("Looking for dataimport.properties at: " + f.getAbsolutePath()); - if (f.exists()) { - log.info("Deleting dataimport.properties"); - if (!f.delete()) - log.warn("Could not delete dataimport.properties"); - } - super.tearDown(); - } - - protected String loadDataConfig(String dataConfigFileName) { - try { - SolrCore core = h.getCore(); - return SolrWriter.getResourceAsString(core.getResourceLoader() - .openResource(dataConfigFileName)); - } catch (IOException e) { - e.printStackTrace(); - return null; - } - } - - protected void runFullImport(String dataConfig) throws Exception { - LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import", - "debug", "on", "clean", "true", "commit", "true", "dataConfig", - dataConfig); - h.query("/dataimport", request); - } - - protected void runDeltaImport(String dataConfig) throws Exception { - LocalSolrQueryRequest request = lrf.makeRequest("command", "delta-import", - "debug", "on", "clean", "false", "commit", "true", "dataConfig", - dataConfig); - h.query("/dataimport", request); - } - - /** - * Runs a full-import using the given dataConfig and the provided request parameters. - * - * By default, debug=on, clean=true and commit=true are passed which can be overridden. - * - * @param dataConfig the data-config xml as a string - * @param extraParams any extra request parameters needed to be passed to DataImportHandler - * @throws Exception in case of any error - */ - protected void runFullImport(String dataConfig, Map extraParams) throws Exception { - HashMap params = new HashMap(); - params.put("command", "full-import"); - params.put("debug", "on"); - params.put("dataConfig", dataConfig); - params.put("clean", "true"); - params.put("commit", "true"); - params.putAll(extraParams); - NamedList l = new NamedList(); - for (Map.Entry e : params.entrySet()) { - l.add(e.getKey(),e.getValue()); - } - LocalSolrQueryRequest request = new LocalSolrQueryRequest(h.getCore(), l); - h.query("/dataimport", request); - } - - /** - * Helper for creating a Context instance. Useful for testing Transformers - */ - @SuppressWarnings("unchecked") - public static TestContext getContext(DataConfig.Entity parentEntity, - VariableResolverImpl resolver, DataSource parentDataSource, - String currProcess, final List> entityFields, - final Map entityAttrs) { - if (resolver == null) resolver = new VariableResolverImpl(); - final Context delegate = new ContextImpl(parentEntity, resolver, - parentDataSource, currProcess, - new HashMap(), null, null); - return new TestContext(entityAttrs, delegate, entityFields, parentEntity == null); - } - - /** - * Strings at even index are keys, odd-index strings are values in the - * returned map - */ - @SuppressWarnings("unchecked") - public static Map createMap(Object... args) { - Map result = new HashMap(); - - if (args == null || args.length == 0) - return result; - - for (int i = 0; i < args.length - 1; i += 2) - result.put(args[i], args[i + 1]); - - return result; - } - - static class TestContext extends Context { - private final Map entityAttrs; - private final Context delegate; - private final List> entityFields; - private final boolean root; - String script,scriptlang; - - public TestContext(Map entityAttrs, Context delegate, - List> entityFields, boolean root) { - this.entityAttrs = entityAttrs; - this.delegate = delegate; - this.entityFields = entityFields; - this.root = root; - } - - public String getEntityAttribute(String name) { - return entityAttrs == null ? delegate.getEntityAttribute(name) : entityAttrs.get(name); - } - - public String getResolvedEntityAttribute(String name) { - return entityAttrs == null ? delegate.getResolvedEntityAttribute(name) : - delegate.getVariableResolver().replaceTokens(entityAttrs.get(name)); - } - - public List> getAllEntityFields() { - return entityFields == null ? delegate.getAllEntityFields() - : entityFields; - } - - public VariableResolver getVariableResolver() { - return delegate.getVariableResolver(); - } - - public DataSource getDataSource() { - return delegate.getDataSource(); - } - - public boolean isRootEntity() { - return root; - } - - public String currentProcess() { - return delegate.currentProcess(); - } - - public Map getRequestParameters() { - return delegate.getRequestParameters(); - } - - public EntityProcessor getEntityProcessor() { - return null; - } - - public void setSessionAttribute(String name, Object val, String scope) { - delegate.setSessionAttribute(name, val, scope); - } - - public Object getSessionAttribute(String name, String scope) { - return delegate.getSessionAttribute(name, scope); - } - - public Context getParentContext() { - return delegate.getParentContext(); - } - - public DataSource getDataSource(String name) { - return delegate.getDataSource(name); - } - - public SolrCore getSolrCore() { - return delegate.getSolrCore(); - } - - public Map getStats() { - return delegate.getStats(); - } - - - public String getScript() { - return script == null ? delegate.getScript() : script; - } - - public String getScriptLanguage() { - return scriptlang == null ? delegate.getScriptLanguage() : scriptlang; - } - - public void deleteDoc(String id) { - - } - - public void deleteDocByQuery(String query) { - - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,79 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * This class enables caching of data obtained from the DB to avoid too many sql - * queries - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @version $Id: CachedSqlEntityProcessor.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.3 - */ -public class CachedSqlEntityProcessor extends SqlEntityProcessor { - private boolean isFirst; - - @SuppressWarnings("unchecked") - public void init(Context context) { - super.init(context); - super.cacheInit(); - isFirst = true; - } - - public Map nextRow() { - if (dataSourceRowCache != null) - return getFromRowCacheTransformed(); - if (!isFirst) - return null; - String query = resolver.replaceTokens(context.getEntityAttribute("query")); - isFirst = false; - if (simpleCache != null) { - return getSimpleCacheData(query); - } else { - return getIdCacheData(query); - } - - } - - protected List> getAllNonCachedRows() { - List> rows = new ArrayList>(); - String q = getQuery(); - initQuery(resolver.replaceTokens(q)); - if (rowIterator == null) - return rows; - while (rowIterator.hasNext()) { - Map arow = rowIterator.next(); - if (arow == null) { - break; - } else { - rows.add(arow); - } - } - return rows; - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,84 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.HTMLStripTransformer.TRUE; - -import java.io.IOException; -import java.io.Reader; -import java.sql.Clob; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Transformer instance which converts a Clob to a String. - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and subject to change - * - * @version $Id: ClobTransformer.java 751993 2009-03-10 06:26:23Z shalin $ - * @since solr 1.4 - */ -public class ClobTransformer extends Transformer { - public Object transformRow(Map aRow, Context context) { - for (Map map : context.getAllEntityFields()) { - if (!TRUE.equals(map.get(CLOB))) continue; - String column = map.get(DataImporter.COLUMN); - String srcCol = map.get(RegexTransformer.SRC_COL_NAME); - if (srcCol == null) - srcCol = column; - Object o = aRow.get(srcCol); - if (o instanceof List) { - List inputs = (List) o; - List results = new ArrayList(); - for (Object input : inputs) { - if (input instanceof Clob) { - Clob clob = (Clob) input; - results.add(readFromClob(clob)); - } - } - aRow.put(column, results); - } else { - if (o instanceof Clob) { - Clob clob = (Clob) o; - aRow.put(column, readFromClob(clob)); - } - } - } - return aRow; - } - - private String readFromClob(Clob clob) { - Reader reader = FieldReaderDataSource.readCharStream(clob); - StringBuilder sb = new StringBuilder(); - char[] buf = new char[1024]; - int len; - try { - while ((len = reader.read(buf)) != -1) { - sb.append(buf, 0, len); - } - } catch (IOException e) { - DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e); - } - return sb.toString(); - } - - public static final String CLOB = "clob"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.util.ContentStream; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import java.io.IOException; -import java.io.Reader; -import java.util.Properties; - -/** - * A DataSource implementation which reads from the ContentStream of a POST request - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and may change in the future. - * - * @version $Id: ContentStreamDataSource.java 755141 2009-03-17 07:50:09Z shalin $ - * @since solr 1.4 - */ -public class ContentStreamDataSource extends DataSource { - private ContextImpl context; - private ContentStream contentStream; - private Reader reader; - - public void init(Context context, Properties initProps) { - this.context = (ContextImpl) context; - } - - public Reader getData(String query) { - contentStream = context.docBuilder.requestParameters.contentStream; - if (contentStream == null) - throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body"); - try { - return reader = contentStream.getReader(); - } catch (IOException e) { - DataImportHandlerException.wrapAndThrow(SEVERE, e); - return null; - } - } - - public void close() { - if (contentStream != null) { - try { - if (reader == null) reader = contentStream.getReader(); - reader.close(); - } catch (IOException e) { - } - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,216 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.solr.core.SolrCore; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - *

- * An implementation for the Context - *

- * This API is experimental and subject to change - * - * @version $Id: ContextImpl.java 769058 2009-04-27 16:55:46Z shalin $ - * @since solr 1.3 - */ -public class ContextImpl extends Context { - private DataConfig.Entity entity; - - private ContextImpl parent; - - private VariableResolverImpl resolver; - - private DataSource ds; - - private String currProcess; - - private Map requestParams; - - private DataImporter dataImporter; - - private Map entitySession, globalSession; - - DocBuilder.DocWrapper doc; - - DocBuilder docBuilder; - - - public ContextImpl(DataConfig.Entity entity, VariableResolverImpl resolver, - DataSource ds, String currProcess, - Map global, ContextImpl parentContext, DocBuilder docBuilder) { - this.entity = entity; - this.docBuilder = docBuilder; - this.resolver = resolver; - this.ds = ds; - this.currProcess = currProcess; - if (docBuilder != null) { - this.requestParams = docBuilder.requestParameters.requestParams; - dataImporter = docBuilder.dataImporter; - } - globalSession = global; - parent = parentContext; - } - - public String getEntityAttribute(String name) { - return entity == null ? null : entity.allAttributes.get(name); - } - - public String getResolvedEntityAttribute(String name) { - return entity == null ? null : resolver.replaceTokens(entity.allAttributes.get(name)); - } - - public List> getAllEntityFields() { - return entity == null ? Collections.EMPTY_LIST : entity.allFieldsList; - } - - public VariableResolver getVariableResolver() { - return resolver; - } - - public DataSource getDataSource() { - if (ds != null) return ds; - if (entity.dataSrc == null) { - entity.dataSrc = dataImporter.getDataSourceInstance(entity, entity.dataSource, this); - } - if (entity.dataSrc != null && docBuilder != null && docBuilder.verboseDebug && - Context.FULL_DUMP.equals(currentProcess())) { - //debug is not yet implemented properly for deltas - entity.dataSrc = docBuilder.writer.getDebugLogger().wrapDs(entity.dataSrc); - } - return entity.dataSrc; - } - - public DataSource getDataSource(String name) { - return dataImporter.getDataSourceInstance(entity, name, this); - } - - public boolean isRootEntity() { - return entity.isDocRoot; - } - - public String currentProcess() { - return currProcess; - } - - public Map getRequestParameters() { - return requestParams; - } - - public EntityProcessor getEntityProcessor() { - return entity == null ? null : entity.processor; - } - - public void setSessionAttribute(String name, Object val, String scope) { - if (Context.SCOPE_ENTITY.equals(scope)) { - if (entitySession == null) - entitySession = new HashMap(); - entitySession.put(name, val); - } else if (Context.SCOPE_GLOBAL.equals(scope)) { - if (globalSession != null) { - globalSession.put(name, val); - } - } else if (Context.SCOPE_DOC.equals(scope)) { - DocBuilder.DocWrapper doc = getDocument(); - if (doc != null) - doc.setSessionAttribute(name, val); - } else if (SCOPE_SOLR_CORE.equals(scope)){ - if(dataImporter != null) dataImporter.getCoreScopeSession().put(name, val); - } - } - - public Object getSessionAttribute(String name, String scope) { - if (Context.SCOPE_ENTITY.equals(scope)) { - if (entitySession == null) - return null; - return entitySession.get(name); - } else if (Context.SCOPE_GLOBAL.equals(scope)) { - if (globalSession != null) { - return globalSession.get(name); - } - } else if (Context.SCOPE_DOC.equals(scope)) { - DocBuilder.DocWrapper doc = getDocument(); - return doc == null ? null: doc.getSessionAttribute(name); - } else if (SCOPE_SOLR_CORE.equals(scope)){ - return dataImporter == null ? null : dataImporter.getCoreScopeSession().get(name); - } - return null; - } - - public Context getParentContext() { - return parent; - } - - private DocBuilder.DocWrapper getDocument() { - ContextImpl c = this; - while (true) { - if (c.doc != null) - return c.doc; - if (c.parent != null) - c = c.parent; - else - return null; - } - } - - public void setDoc(DocBuilder.DocWrapper docWrapper) { - this.doc = docWrapper; - } - - - public SolrCore getSolrCore() { - return dataImporter == null ? null : dataImporter.getCore(); - } - - - public Map getStats() { - return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.emptyMap(); - } - - public String getScript() { - if(dataImporter != null) { - DataConfig.Script script = dataImporter.getConfig().script; - return script == null ? null : script.text; - } - return null; - } - - public String getScriptLanguage() { - if (dataImporter != null) { - DataConfig.Script script = dataImporter.getConfig().script; - return script == null ? null : script.language; - } - return null; - } - - public void deleteDoc(String id) { - if(docBuilder != null){ - docBuilder.writer.deleteDoc(id); - } - } - - public void deleteDocByQuery(String query) { - if(docBuilder != null){ - docBuilder.writer.deleteByQuery(query); - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,212 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.dataimport; - -import org.apache.solr.core.SolrCore; - -import java.util.List; -import java.util.Map; - -/** - *

- * This abstract class gives access to all available objects. So any - * component implemented by a user can have the full power of DataImportHandler - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @version $Id: Context.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.3 - */ -public abstract class Context { - public static final String FULL_DUMP = "FULL_DUMP", DELTA_DUMP = "DELTA_DUMP", FIND_DELTA = "FIND_DELTA"; - - /** - * An object stored in entity scope is valid only for the current entity for the current document only. - */ - public static final String SCOPE_ENTITY = "entity"; - - /** - * An object stored in global scope is available for the current import only but across entities and documents. - */ - public static final String SCOPE_GLOBAL = "global"; - - /** - * An object stored in document scope is available for the current document only but across entities. - */ - public static final String SCOPE_DOC = "document"; - - /** - * An object stored in 'solrcore' scope is available across imports, entities and documents throughout the life of - * a solr core. A solr core unload or reload will destroy this data. - */ - public static final String SCOPE_SOLR_CORE = "solrcore"; - - /** - * Get the value of any attribute put into this entity - * - * @param name name of the attribute eg: 'name' - * @return value of named attribute in entity - */ - public abstract String getEntityAttribute(String name); - - /** - * Get the value of any attribute put into this entity after resolving all variables found in the attribute value - * @param name name of the attribute - * @return value of the named attribute after resolving all variables - */ - public abstract String getResolvedEntityAttribute(String name); - - /** - * Returns all the fields put into an entity. each item (which is a map ) in - * the list corresponds to one field. each if the map contains the attribute - * names and values in a field - * - * @return all fields in an entity - */ - public abstract List> getAllEntityFields(); - - /** - * Returns the VariableResolver used in this entity which can be used to - * resolve the tokens in ${} - * - * @return a VariableResolver instance - * @see org.apache.solr.handler.dataimport.VariableResolver - */ - - public abstract VariableResolver getVariableResolver(); - - /** - * Gets the datasource instance defined for this entity. Do not close() this instance. - * Transformers should use the getDataSource(String name) method. - * - * @return a new DataSource instance as configured for the current entity - * @see org.apache.solr.handler.dataimport.DataSource - * @see #getDataSource(String) - */ - public abstract DataSource getDataSource(); - - /** - * Gets a new DataSource instance with a name. Ensure that you close() this after use - * because this is created just for this method call. - * - * @param name Name of the dataSource as defined in the dataSource tag - * @return a new DataSource instance - * @see org.apache.solr.handler.dataimport.DataSource - */ - public abstract DataSource getDataSource(String name); - - /** - * Returns the instance of EntityProcessor used for this entity - * - * @return instance of EntityProcessor used for the current entity - * @see org.apache.solr.handler.dataimport.EntityProcessor - */ - public abstract EntityProcessor getEntityProcessor(); - - /** - * Store values in a certain name and scope (entity, document,global) - * - * @param name the key - * @param val the value - * @param scope the scope in which the given key, value pair is to be stored - */ - public abstract void setSessionAttribute(String name, Object val, String scope); - - /** - * get a value by name in the given scope (entity, document,global) - * - * @param name the key - * @param scope the scope from which the value is to be retreived - * @return the object stored in the given scope with the given key - */ - public abstract Object getSessionAttribute(String name, String scope); - - /** - * Get the context instance for the parent entity. works only in the full dump - * If the current entity is rootmost a null is returned - * - * @return parent entity's Context - */ - public abstract Context getParentContext(); - - /** - * The request parameters passed over HTTP for this command the values in the - * map are either String(for single valued parameters) or List (for - * multi-valued parameters) - * - * @return the request parameters passed in the URL to initiate this process - */ - public abstract Map getRequestParameters(); - - /** - * Returns if the current entity is the root entity - * - * @return true if current entity is the root entity, false otherwise - */ - public abstract boolean isRootEntity(); - - /** - * Returns the current process FULL_DUMP, DELTA_DUMP, FIND_DELTA - * - * @return the type of the current running process - */ - public abstract String currentProcess(); - - /** - * Exposing the actual SolrCore to the components - * - * @return the core - */ - public abstract SolrCore getSolrCore(); - - /** - * Makes available some basic running statistics such as "docCount", - * "deletedDocCount", "rowCount", "queryCount" and "skipDocCount" - * - * @return a Map containing running statistics of the current import - */ - public abstract Map getStats(); - - /** - * Returns the text specified in the script tag in the data-config.xml - */ - public abstract String getScript(); - - /** - * Returns the language of the script as specified in the script tag in data-config.xml - */ - public abstract String getScriptLanguage(); - - /**delete a document by id - * @param id - */ - public abstract void deleteDoc(String id); - - /**delete documents by query - * @param query - */ - public abstract void deleteDocByQuery(String query); - - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,367 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.w3c.dom.Element; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.apache.solr.schema.SchemaField; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; - -/** - *

- * Mapping for data-config.xml - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @version $Id: DataConfig.java 807537 2009-08-25 09:54:26Z noble $ - * @since solr 1.3 - */ -public class DataConfig { - static final Logger LOG = LoggerFactory.getLogger(DataConfig.class); - - public Document document; - - public List> functions = new ArrayList>(); - - public Script script; - - public Map dataSources = new HashMap(); - - public Map lowerNameVsSchemaField = new HashMap(); - - public static class Document { - // TODO - remove from here and add it to entity - public String deleteQuery; - - public List entities = new ArrayList(); - - public String onImportStart, onImportEnd; - - public Document() { - } - - public Document(Element element) { - this.deleteQuery = getStringAttribute(element, "deleteQuery", null); - this.onImportStart = getStringAttribute(element, "onImportStart", null); - this.onImportEnd = getStringAttribute(element, "onImportEnd", null); - List l = getChildNodes(element, "entity"); - for (Element e : l) - entities.add(new Entity(e)); - } - } - - public static class Entity { - public String name; - - public String pk; - - public String pkMappingFromSchema; - - public String dataSource; - - public Map allAttributes; - - public String proc; - - public String docRoot; - - public boolean isDocRoot = false; - - public List fields = new ArrayList(); - - public List> allFieldsList = new ArrayList>(); - - public List entities; - - public String[] primaryKeys; - - public Entity parentEntity; - - public EntityProcessorWrapper processor; - - @SuppressWarnings("unchecked") - public DataSource dataSrc; - - public Script script; - - public Map> colNameVsField = new HashMap>(); - - public Entity() { - } - - public Entity(Element element) { - name = getStringAttribute(element, NAME, null); - if(name == null){ - LOG.warn("Entity does not have a name"); - name= ""+System.nanoTime(); - } - if(name.indexOf(".") != -1){ - throw new DataImportHandlerException(SEVERE, "Entity name must not have period (.): '" + name); - } - if (RESERVED_WORDS.contains(name)) { - throw new DataImportHandlerException(SEVERE, "Entity name : '" + name - + "' is a reserved keyword. Reserved words are: " + RESERVED_WORDS); - } - pk = getStringAttribute(element, "pk", null); - docRoot = getStringAttribute(element, ROOT_ENTITY, null); - proc = getStringAttribute(element, PROCESSOR, null); - dataSource = getStringAttribute(element, DataImporter.DATA_SRC, null); - allAttributes = getAllAttributes(element); - List n = getChildNodes(element, "field"); - for (Element elem : n) { - Field field = new Field(elem); - fields.add(field); - List l = colNameVsField.get(field.column); - if(l == null) l = new ArrayList(); - boolean alreadyFound = false; - for (Field f : l) { - if(f.getName().equals(field.getName())) { - alreadyFound = true; - break; - } - } - if(!alreadyFound) l.add(field); - colNameVsField.put(field.column, l); - } - n = getChildNodes(element, "entity"); - if (!n.isEmpty()) - entities = new ArrayList(); - for (Element elem : n) - entities.add(new Entity(elem)); - - } - - public void clearCache() { - if (entities != null) { - for (Entity entity : entities) - entity.clearCache(); - } - try { - processor.close(); - } catch (Exception e) { - /*no op*/ - } - processor = null; - if (dataSrc != null) - dataSrc.close(); - dataSrc = null; - } - - public String getPk(){ - return pk == null ? pkMappingFromSchema : pk; - } - } - - public static class Script { - public String language; - - public String text; - - public Script() { - } - - public Script(Element e) { - this.language = getStringAttribute(e, "language", "JavaScript"); - StringBuilder buffer = new StringBuilder(); - String script = getTxt(e, buffer); - if (script != null) - this.text = script.trim(); - } - } - - public static class Field { - - public String column; - - public String name; - - public Float boost = 1.0f; - - public boolean toWrite = true; - - public boolean multiValued = false; - - boolean dynamicName; - - - public Map allAttributes = new HashMap() { - public String put(String key, String value) { - if (super.containsKey(key)) - return super.get(key); - return super.put(key, value); - } - }; - - public Field() { - } - - public Field(Element e) { - this.name = getStringAttribute(e, DataImporter.NAME, null); - this.column = getStringAttribute(e, DataImporter.COLUMN, null); - if (column == null) { - throw new DataImportHandlerException(SEVERE, "Field must have a column attribute"); - } - this.boost = Float.parseFloat(getStringAttribute(e, "boost", "1.0f")); - allAttributes.putAll(getAllAttributes(e)); - } - - public String getName() { - return name == null ? column : name; - } - - public Entity entity; - - } - - public void readFromXml(Element e) { - List n = getChildNodes(e, "document"); - if (n.isEmpty()) { - throw new DataImportHandlerException(SEVERE, "DataImportHandler " + - "configuration file must have one node."); - } - document = new Document(n.get(0)); - - n = getChildNodes(e, SCRIPT); - if (!n.isEmpty()) { - script = new Script(n.get(0)); - } - - // Add the provided evaluators - n = getChildNodes(e, FUNCTION); - if (!n.isEmpty()) { - for (Element element : n) { - String func = getStringAttribute(element, NAME, null); - String clz = getStringAttribute(element, CLASS, null); - if (func == null || clz == null){ - throw new DataImportHandlerException( - SEVERE, - " must have a 'name' and 'class' attributes"); - } else { - functions.add(getAllAttributes(element)); - } - } - } - n = getChildNodes(e, DATA_SRC); - if (!n.isEmpty()) { - for (Element element : n) { - Properties p = new Properties(); - HashMap attrs = getAllAttributes(element); - for (Map.Entry entry : attrs.entrySet()) { - p.setProperty(entry.getKey(), entry.getValue()); - } - dataSources.put(p.getProperty("name"), p); - } - } - } - - private static String getStringAttribute(Element e, String name, String def) { - String r = e.getAttribute(name); - if (r == null || "".equals(r.trim())) - r = def; - return r; - } - - private static HashMap getAllAttributes(Element e) { - HashMap m = new HashMap(); - NamedNodeMap nnm = e.getAttributes(); - for (int i = 0; i < nnm.getLength(); i++) { - m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue()); - } - return m; - } - - public static String getTxt(Node elem, StringBuilder buffer) { - if (elem.getNodeType() != Node.CDATA_SECTION_NODE) { - NodeList childs = elem.getChildNodes(); - for (int i = 0; i < childs.getLength(); i++) { - Node child = childs.item(i); - short childType = child.getNodeType(); - if (childType != Node.COMMENT_NODE - && childType != Node.PROCESSING_INSTRUCTION_NODE) { - getTxt(child, buffer); - } - } - } else { - buffer.append(elem.getNodeValue()); - } - - return buffer.toString(); - } - - public static List getChildNodes(Element e, String byName) { - List result = new ArrayList(); - NodeList l = e.getChildNodes(); - for (int i = 0; i < l.getLength(); i++) { - if (e.equals(l.item(i).getParentNode()) - && byName.equals(l.item(i).getNodeName())) - result.add((Element) l.item(i)); - } - return result; - } - - public void clearCaches() { - for (Entity entity : document.entities) - entity.clearCache(); - } - - public static final String SCRIPT = "script"; - - public static final String NAME = "name"; - - public static final String PROCESSOR = "processor"; - - /** - * @deprecated use IMPORTER_NS_SHORT instead - */ - @Deprecated - public static final String IMPORTER_NS = "dataimporter"; - - public static final String IMPORTER_NS_SHORT = "dih"; - - public static final String ROOT_ENTITY = "rootEntity"; - - public static final String FUNCTION = "function"; - - public static final String CLASS = "class"; - - public static final String DATA_SRC = "dataSource"; - - private static final Set RESERVED_WORDS = new HashSet(); - static{ - RESERVED_WORDS.add(IMPORTER_NS); - RESERVED_WORDS.add(IMPORTER_NS_SHORT); - RESERVED_WORDS.add("request"); - RESERVED_WORDS.add("delta"); - RESERVED_WORDS.add("functions"); - RESERVED_WORDS.add("session"); - RESERVED_WORDS.add(SolrWriter.LAST_INDEX_KEY); - } - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,578 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.dataimport; - -import org.apache.solr.core.SolrConfig; -import org.apache.solr.core.SolrCore; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.SchemaField; -import org.apache.solr.common.util.ContentStream; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import java.io.StringReader; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantLock; - -/** - *

Stores all configuration information for pulling and indexing data.

- *

- * This API is experimental and subject to change - * - * @version $Id: DataImporter.java 826074 2009-10-16 20:34:16Z shalin $ - * @since solr 1.3 - */ -public class DataImporter { - - public enum Status { - IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED - } - - private static final Logger LOG = LoggerFactory.getLogger(DataImporter.class); - - private Status status = Status.IDLE; - - private DataConfig config; - - private Date lastIndexTime; - - private Date indexStartTime; - - private Properties store = new Properties(); - - private Map dataSourceProps = new HashMap(); - - private IndexSchema schema; - - public DocBuilder docBuilder; - - public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics(); - - public Map evaluators; - - private SolrCore core; - - private ReentrantLock importLock = new ReentrantLock(); - - private final Map coreScopeSession; - - /** - * Only for testing purposes - */ - DataImporter() { - coreScopeSession = new HashMap(); - } - - DataImporter(String dataConfig, SolrCore core, Map ds, Map session) { - if (dataConfig == null) - throw new DataImportHandlerException(SEVERE, - "Configuration not found"); - this.core = core; - this.schema = core.getSchema(); - dataSourceProps = ds; - if (session == null) - session = new HashMap(); - coreScopeSession = session; - loadDataConfig(dataConfig); - - for (Map.Entry entry : schema.getFields().entrySet()) { - config.lowerNameVsSchemaField.put(entry.getKey().toLowerCase(), entry.getValue()); - } - - for (DataConfig.Entity e : config.document.entities) { - Map fields = new HashMap(); - initEntity(e, fields, false); - verifyWithSchema(fields); - identifyPk(e); - } - } - - private void verifyWithSchema(Map fields) { - Map schemaFields = schema.getFields(); - for (Map.Entry entry : schemaFields.entrySet()) { - SchemaField sf = entry.getValue(); - if (!fields.containsKey(sf.getName())) { - if (sf.isRequired()) { - LOG - .info(sf.getName() - + " is a required field in SolrSchema . But not found in DataConfig"); - } - } - } - for (Map.Entry entry : fields.entrySet()) { - DataConfig.Field fld = entry.getValue(); - SchemaField field = schema.getFieldOrNull(fld.getName()); - if (field == null) { - field = config.lowerNameVsSchemaField.get(fld.getName().toLowerCase()); - if (field == null) { - LOG.info("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema"); - } - } - } - - } - - /** - * Used by tests - */ - void loadAndInit(String configStr) { - loadDataConfig(configStr); - Map fields = new HashMap(); - for (DataConfig.Entity entity : config.document.entities) { - initEntity(entity, fields, false); - } - } - - private void identifyPk(DataConfig.Entity entity) { - String schemaPk = schema.getUniqueKeyField().getName(); - //if no fields are mentioned . solr uniqeKey is same as dih 'pk' - entity.pkMappingFromSchema = schemaPk; - for (DataConfig.Field field : entity.fields) { - if(field.getName().equals(schemaPk)) { - entity.pkMappingFromSchema = field.column; - //get the corresponding column mapping for the solr uniqueKey - // But if there are multiple columns mapping to the solr uniqueKey, it will fail - // so , in one off cases we may need pk - break; - } - } - - } - - private void loadDataConfig(String configFile) { - - try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - try { - dbf.setXIncludeAware(true); - dbf.setNamespaceAware(true); - } catch( UnsupportedOperationException e ) { - LOG.warn( "XML parser doesn't support XInclude option" ); - } - DocumentBuilder builder = dbf.newDocumentBuilder(); - Document document = builder.parse(new InputSource(new StringReader( - configFile))); - - config = new DataConfig(); - NodeList elems = document.getElementsByTagName("dataConfig"); - if(elems == null || elems.getLength() == 0) { - throw new DataImportHandlerException(SEVERE, "the root node '' is missing"); - } - config.readFromXml((Element) elems.item(0)); - LOG.info("Data Configuration loaded successfully"); - } catch (Exception e) { - SolrConfig.severeErrors.add(e); - throw new DataImportHandlerException(SEVERE, - "Exception occurred while initializing context", e); - } - } - - private void initEntity(DataConfig.Entity e, - Map fields, boolean docRootFound) { - if (e.pk != null) - e.primaryKeys = e.pk.split(","); - e.allAttributes.put(DATA_SRC, e.dataSource); - - if (!docRootFound && !"false".equals(e.docRoot)) { - // if in this chain no document root is found() - e.isDocRoot = true; - } - - if (e.fields != null) { - for (DataConfig.Field f : e.fields) { - if (schema != null) { - if(f.name != null && f.name.contains("${")){ - f.dynamicName = true; - continue; - } - SchemaField schemaField = schema.getFieldOrNull(f.getName()); - if (schemaField == null) { - schemaField = config.lowerNameVsSchemaField.get(f.getName().toLowerCase()); - if (schemaField != null) f.name = schemaField.getName(); - } - if (schemaField != null) { - f.multiValued = schemaField.multiValued(); - f.allAttributes.put(MULTI_VALUED, Boolean.toString(schemaField - .multiValued())); - f.allAttributes.put(TYPE, schemaField.getType().getTypeName()); - f.allAttributes.put("indexed", Boolean.toString(schemaField.indexed())); - f.allAttributes.put("stored", Boolean.toString(schemaField.stored())); - f.allAttributes.put("defaultValue", schemaField.getDefaultValue()); - } else { - f.toWrite = false; - } - } - fields.put(f.getName(), f); - f.entity = e; - f.allAttributes.put("boost", f.boost.toString()); - f.allAttributes.put("toWrite", Boolean.toString(f.toWrite)); - e.allFieldsList.add(Collections.unmodifiableMap(f.allAttributes)); - } - } - e.allFieldsList = Collections.unmodifiableList(e.allFieldsList); - e.allAttributes = Collections.unmodifiableMap(e.allAttributes); - - if (e.entities == null) - return; - for (DataConfig.Entity e1 : e.entities) { - e1.parentEntity = e; - initEntity(e1, fields, e.isDocRoot || docRootFound); - } - - } - - DataConfig getConfig() { - return config; - } - - Date getIndexStartTime() { - return indexStartTime; - } - - void setIndexStartTime(Date indextStartTime) { - this.indexStartTime = indextStartTime; - } - - void store(Object key, Object value) { - store.put(key, value); - } - - Object retrieve(Object key) { - return store.get(key); - } - - DataSource getDataSourceInstance(DataConfig.Entity key, String name, Context ctx) { - if ("null".equals(name)) return new MockDataSource(); - Properties p = dataSourceProps.get(name); - if (p == null) - p = config.dataSources.get(name); - if (p == null) - p = dataSourceProps.get(null);// for default data source - if (p == null) - p = config.dataSources.get(null); - if (p == null) - throw new DataImportHandlerException(SEVERE, - "No dataSource :" + name + " available for entity :" - + key.name); - String impl = p.getProperty(TYPE); - DataSource dataSrc = null; - if (impl == null) { - dataSrc = new JdbcDataSource(); - } else { - try { - dataSrc = (DataSource) DocBuilder.loadClass(impl, getCore()).newInstance(); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Invalid type for data source: " + impl); - } - } - try { - Properties copyProps = new Properties(); - copyProps.putAll(p); - Map map = ctx.getRequestParameters(); - if (map.containsKey("rows")) { - int rows = Integer.parseInt((String) map.get("rows")); - if (map.containsKey("start")) { - rows += Integer.parseInt((String) map.get("start")); - } - copyProps.setProperty("maxRows", String.valueOf(rows)); - } - dataSrc.init(ctx, copyProps); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.dataSource); - } - return dataSrc; - } - - public Status getStatus() { - return status; - } - - public void setStatus(Status status) { - this.status = status; - } - - public boolean isBusy() { - return importLock.isLocked(); - } - - public void doFullImport(SolrWriter writer, RequestParams requestParams) { - LOG.info("Starting Full Import"); - setStatus(Status.RUNNING_FULL_DUMP); - - setIndexStartTime(new Date()); - - try { - docBuilder = new DocBuilder(this, writer, requestParams); - docBuilder.execute(); - if (!requestParams.debug) - cumulativeStatistics.add(docBuilder.importStatistics); - } catch (Throwable t) { - LOG.error("Full Import failed", t); - docBuilder.rollback(); - } finally { - setStatus(Status.IDLE); - config.clearCaches(); - DocBuilder.INSTANCE.set(null); - } - - } - - public void doDeltaImport(SolrWriter writer, RequestParams requestParams) { - LOG.info("Starting Delta Import"); - setStatus(Status.RUNNING_DELTA_DUMP); - - try { - setIndexStartTime(new Date()); - docBuilder = new DocBuilder(this, writer, requestParams); - docBuilder.execute(); - if (!requestParams.debug) - cumulativeStatistics.add(docBuilder.importStatistics); - } catch (Throwable t) { - LOG.error("Delta Import Failed", t); - docBuilder.rollback(); - } finally { - setStatus(Status.IDLE); - config.clearCaches(); - DocBuilder.INSTANCE.set(null); - } - - } - - public void runAsync(final RequestParams reqParams, final SolrWriter sw) { - new Thread() { - @Override - public void run() { - runCmd(reqParams, sw); - } - }.start(); - } - - void runCmd(RequestParams reqParams, SolrWriter sw) { - String command = reqParams.command; - if (command.equals(ABORT_CMD)) { - if (docBuilder != null) { - docBuilder.abort(); - } - return; - } - if (!importLock.tryLock()){ - LOG.warn("Import command failed . another import is running"); - return; - } - try { - if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) { - doFullImport(sw, reqParams); - } else if (command.equals(DELTA_IMPORT_CMD)) { - doDeltaImport(sw, reqParams); - } - } finally { - importLock.unlock(); - } - } - - @SuppressWarnings("unchecked") - Map getStatusMessages() { - //this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we - // synchronize on the object it must be safe to iterate through the map - Map statusMessages = (Map) retrieve(STATUS_MSGS); - Map result = new LinkedHashMap(); - if (statusMessages != null) { - synchronized (statusMessages) { - for (Object o : statusMessages.entrySet()) { - Map.Entry e = (Map.Entry) o; - //the toString is taken because some of the Objects create the data lazily when toString() is called - result.put((String) e.getKey(), e.getValue().toString()); - } - } - } - return result; - - } - - DocBuilder getDocBuilder() { - return docBuilder; - } - - static final ThreadLocal QUERY_COUNT = new ThreadLocal() { - protected AtomicLong initialValue() { - return new AtomicLong(); - } - }; - - static final ThreadLocal DATE_TIME_FORMAT = new ThreadLocal() { - @Override - protected SimpleDateFormat initialValue() { - return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - } - }; - - static final class MSG { - public static final String NO_CONFIG_FOUND = "Configuration not found"; - - public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run"; - - public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid"; - - public static final String LOAD_EXP = "Exception while loading DataImporter"; - - public static final String JMX_DESC = "Manage data import from databases to Solr"; - - public static final String CMD_RUNNING = "A command is still running..."; - - public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag true in solrconfig.xml"; - - public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully"; - - public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed"; - - public static final String TOTAL_FAILED_DOCS = "Total Documents Failed"; - - public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource"; - - public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched"; - - public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted"; - - public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped"; - } - - static final class RequestParams { - public String command = null; - - public boolean debug = false; - - public boolean verbose = false; - - public boolean commit = true; - - public boolean optimize = true; - - public int start = 0; - - public long rows = Integer.MAX_VALUE; - - public boolean clean = true; - - public List entities; - - public Map requestParams; - - public String dataConfig; - - public ContentStream contentStream; - - public RequestParams() { - } - - public RequestParams(Map requestParams) { - if (requestParams.containsKey("command")) - command = (String) requestParams.get("command"); - - if ("on".equals(requestParams.get("debug"))) { - debug = true; - rows = 10; - // Set default values suitable for debug mode - commit = false; - clean = false; - verbose = "true".equals(requestParams.get("verbose")) - || "on".equals(requestParams.get("verbose")); - } - if (DELTA_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) { - clean = false; - } - if (requestParams.containsKey("commit")) - commit = Boolean.parseBoolean((String) requestParams.get("commit")); - if (requestParams.containsKey("start")) - start = Integer.parseInt((String) requestParams.get("start")); - if (requestParams.containsKey("rows")) - rows = Integer.parseInt((String) requestParams.get("rows")); - if (requestParams.containsKey("clean")) - clean = Boolean.parseBoolean((String) requestParams.get("clean")); - if (requestParams.containsKey("optimize")) { - optimize = Boolean.parseBoolean((String) requestParams.get("optimize")); - if (optimize) - commit = true; - } - - Object o = requestParams.get("entity"); - - if (o != null && o instanceof String) { - entities = new ArrayList(); - entities.add((String) o); - } else if (o != null && o instanceof List) { - entities = (List) requestParams.get("entity"); - } - - dataConfig = (String) requestParams.get("dataConfig"); - if (dataConfig != null && dataConfig.trim().length() == 0) { - // Empty data-config param is not valid, change it to null - dataConfig = null; - } - - this.requestParams = requestParams; - } - } - - IndexSchema getSchema() { - return schema; - } - - Map getCoreScopeSession() { - return coreScopeSession; - } - - SolrCore getCore() { - return core; - } - - public static final String COLUMN = "column"; - - public static final String TYPE = "type"; - - public static final String DATA_SRC = "dataSource"; - - public static final String MULTI_VALUED = "multiValued"; - - public static final String NAME = "name"; - - public static final String STATUS_MSGS = "status-messages"; - - public static final String FULL_IMPORT_CMD = "full-import"; - - public static final String IMPORT_CMD = "import"; - - public static final String DELTA_IMPORT_CMD = "delta-import"; - - public static final String ABORT_CMD = "abort"; - - public static final String DEBUG_MODE = "debug"; - - public static final String RELOAD_CONF_CMD = "reload-config"; - - public static final String SHOW_CONF_CMD = "show-config"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.dataimport; - -/** - *

Exception class for all DataImportHandler exceptions

- *

- * This API is experimental and subject to change - *

- * $Id: DataImportHandlerException.java 764379 2009-04-13 09:00:59Z shalin $ - * - * @since solr 1.3 - */ -public class DataImportHandlerException extends RuntimeException { - private int errCode; - - public boolean debugged = false; - - public static final int SEVERE = 500, WARN = 400, SKIP = 300, SKIP_ROW =301; - - public DataImportHandlerException(int err) { - super(); - errCode = err; - } - - public DataImportHandlerException(int err, String message) { - super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount())); - errCode = err; - } - - public DataImportHandlerException(int err, String message, Throwable cause) { - super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()), cause); - errCode = err; - } - - public DataImportHandlerException(int err, Throwable cause) { - super(cause); - errCode = err; - } - - public int getErrCode() { - return errCode; - } - - public static void wrapAndThrow(int err, Exception e) { - if (e instanceof DataImportHandlerException) { - throw (DataImportHandlerException) e; - } else { - throw new DataImportHandlerException(err, e); - } - } - - public static void wrapAndThrow(int err, Exception e, String msg) { - if (e instanceof DataImportHandlerException) { - throw (DataImportHandlerException) e; - } else { - throw new DataImportHandlerException(err, msg, e); - } - } - - - public static final String MSG = " Processing Document # "; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,360 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.params.UpdateParams; -import org.apache.solr.common.util.ContentStreamBase; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.core.SolrConfig; -import org.apache.solr.core.SolrCore; -import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.handler.RequestHandlerBase; -import org.apache.solr.handler.RequestHandlerUtils; -import org.apache.solr.request.RawResponseWriter; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.request.SolrQueryResponse; -import org.apache.solr.request.SolrRequestHandler; -import org.apache.solr.update.processor.UpdateRequestProcessor; -import org.apache.solr.update.processor.UpdateRequestProcessorChain; -import org.apache.solr.util.plugin.SolrCoreAware; - -import java.util.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - *

- * Solr Request Handler for data import from databases and REST data sources. - *

- *

- * It is configured in solrconfig.xml - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @version $Id: DataImportHandler.java 788580 2009-06-26 05:20:23Z noble $ - * @since solr 1.3 - */ -public class DataImportHandler extends RequestHandlerBase implements - SolrCoreAware { - - private static final Logger LOG = LoggerFactory.getLogger(DataImportHandler.class); - - private DataImporter importer; - - private Map dataSources = new HashMap(); - - private List debugDocuments; - - private boolean debugEnabled = true; - - private String myName = "dataimport"; - - private Map coreScopeSession = new HashMap(); - - @Override - @SuppressWarnings("unchecked") - public void init(NamedList args) { - super.init(args); - } - - @SuppressWarnings("unchecked") - public void inform(SolrCore core) { - try { - //hack to get the name of this handler - for (Map.Entry e : core.getRequestHandlers().entrySet()) { - SolrRequestHandler handler = e.getValue(); - //this will not work if startup=lazy is set - if( this == handler) { - String name= e.getKey(); - if(name.startsWith("/")){ - myName = name.substring(1); - } - // some users may have '/' in the handler name. replace with '_' - myName = myName.replaceAll("/","_") ; - } - } - String debug = (String) initArgs.get(ENABLE_DEBUG); - if (debug != null && "no".equals(debug)) - debugEnabled = false; - NamedList defaults = (NamedList) initArgs.get("defaults"); - if (defaults != null) { - String configLoc = (String) defaults.get("config"); - if (configLoc != null && configLoc.length() != 0) { - processConfiguration(defaults); - - importer = new DataImporter(SolrWriter.getResourceAsString(core - .getResourceLoader().openResource(configLoc)), core, - dataSources, coreScopeSession); - } - } - } catch (Throwable e) { - SolrConfig.severeErrors.add(e); - LOG.error( DataImporter.MSG.LOAD_EXP, e); - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - DataImporter.MSG.INVALID_CONFIG, e); - } - } - - @Override - @SuppressWarnings("unchecked") - public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) - throws Exception { - rsp.setHttpCaching(false); - SolrParams params = req.getParams(); - DataImporter.RequestParams requestParams = new DataImporter.RequestParams(getParamsMap(params)); - String command = requestParams.command; - Iterable streams = req.getContentStreams(); - if(streams != null){ - for (ContentStream stream : streams) { - requestParams.contentStream = stream; - break; - } - } - if (DataImporter.SHOW_CONF_CMD.equals(command)) { - // Modify incoming request params to add wt=raw - ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams()); - rawParams.set(CommonParams.WT, "raw"); - req.setParams(rawParams); - String dataConfigFile = defaults.get("config"); - ContentStreamBase content = new ContentStreamBase.StringStream(SolrWriter - .getResourceAsString(req.getCore().getResourceLoader().openResource( - dataConfigFile))); - rsp.add(RawResponseWriter.CONTENT, content); - return; - } - - rsp.add("initArgs", initArgs); - String message = ""; - - if (command != null) - rsp.add("command", command); - - if (requestParams.debug && (importer == null || !importer.isBusy())) { - // Reload the data-config.xml - importer = null; - if (requestParams.dataConfig != null) { - try { - processConfiguration((NamedList) initArgs.get("defaults")); - importer = new DataImporter(requestParams.dataConfig, req.getCore() - , dataSources, coreScopeSession); - } catch (RuntimeException e) { - rsp.add("exception", DebugLogger.getStacktraceString(e)); - importer = null; - return; - } - } else { - inform(req.getCore()); - } - message = DataImporter.MSG.CONFIG_RELOADED; - } - - // If importer is still null - if (importer == null) { - rsp.add("status", DataImporter.MSG.NO_INIT); - return; - } - - if (command != null && DataImporter.ABORT_CMD.equals(command)) { - importer.runCmd(requestParams, null); - } else if (importer.isBusy()) { - message = DataImporter.MSG.CMD_RUNNING; - } else if (command != null) { - if (DataImporter.FULL_IMPORT_CMD.equals(command) - || DataImporter.DELTA_IMPORT_CMD.equals(command) || - IMPORT_CMD.equals(command)) { - - UpdateRequestProcessorChain processorChain = - req.getCore().getUpdateProcessingChain(params.get(UpdateParams.UPDATE_PROCESSOR)); - UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp); - SolrResourceLoader loader = req.getCore().getResourceLoader(); - SolrWriter sw = getSolrWriter(processor, loader, requestParams); - - if (requestParams.debug) { - if (debugEnabled) { - // Synchronous request for the debug mode - importer.runCmd(requestParams, sw); - rsp.add("mode", "debug"); - rsp.add("documents", debugDocuments); - if (sw.debugLogger != null) - rsp.add("verbose-output", sw.debugLogger.output); - debugDocuments = null; - } else { - message = DataImporter.MSG.DEBUG_NOT_ENABLED; - } - } else { - // Asynchronous request for normal mode - if(requestParams.contentStream == null){ - importer.runAsync(requestParams, sw); - } else { - importer.runCmd(requestParams, sw); - } - } - } else if (DataImporter.RELOAD_CONF_CMD.equals(command)) { - importer = null; - inform(req.getCore()); - message = DataImporter.MSG.CONFIG_RELOADED; - } - } - rsp.add("status", importer.isBusy() ? "busy" : "idle"); - rsp.add("importResponse", message); - rsp.add("statusMessages", importer.getStatusMessages()); - - RequestHandlerUtils.addExperimentalFormatWarning(rsp); - } - - private Map getParamsMap(SolrParams params) { - Iterator names = params.getParameterNamesIterator(); - Map result = new HashMap(); - while (names.hasNext()) { - String s = names.next(); - String[] val = params.getParams(s); - if (val == null || val.length < 1) - continue; - if (val.length == 1) - result.put(s, val[0]); - else - result.put(s, Arrays.asList(val)); - } - return result; - } - - @SuppressWarnings("unchecked") - private void processConfiguration(NamedList defaults) { - if (defaults == null) { - LOG.info("No configuration specified in solrconfig.xml for DataImportHandler"); - return; - } - - LOG.info("Processing configuration from solrconfig.xml: " + defaults); - - dataSources = new HashMap(); - - int position = 0; - - while (position < defaults.size()) { - if (defaults.getName(position) == null) - break; - - String name = defaults.getName(position); - if (name.equals("datasource")) { - NamedList dsConfig = (NamedList) defaults.getVal(position); - Properties props = new Properties(); - for (int i = 0; i < dsConfig.size(); i++) - props.put(dsConfig.getName(i), dsConfig.getVal(i)); - LOG.info("Adding properties to datasource: " + props); - dataSources.put((String) dsConfig.get("name"), props); - } - position++; - } - } - - private SolrWriter getSolrWriter(final UpdateRequestProcessor processor, - final SolrResourceLoader loader, final DataImporter.RequestParams requestParams) { - - return new SolrWriter(processor, loader.getConfigDir(), myName) { - - @Override - public boolean upload(SolrInputDocument document) { - try { - if (requestParams.debug) { - if (debugDocuments == null) - debugDocuments = new ArrayList(); - debugDocuments.add(document); - } - return super.upload(document); - } catch (RuntimeException e) { - LOG.error( "Exception while adding: " + document, e); - return false; - } - } - }; - } - - @Override - @SuppressWarnings("unchecked") - public NamedList getStatistics() { - if (importer == null) - return super.getStatistics(); - - DocBuilder.Statistics cumulative = importer.cumulativeStatistics; - NamedList result = new NamedList(); - - result.add("Status", importer.getStatus().toString()); - - if (importer.docBuilder != null) { - DocBuilder.Statistics running = importer.docBuilder.importStatistics; - result.add("Documents Processed", running.docCount); - result.add("Requests made to DataSource", running.queryCount); - result.add("Rows Fetched", running.rowsCount); - result.add("Documents Deleted", running.deletedDocCount); - result.add("Documents Skipped", running.skipDocCount); - } - - result.add(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount); - result.add(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount); - result.add(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount); - result.add(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount); - result.add(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount); - - NamedList requestStatistics = super.getStatistics(); - if (requestStatistics != null) { - for (int i = 0; i < requestStatistics.size(); i++) { - result.add(requestStatistics.getName(i), requestStatistics.getVal(i)); - } - } - - return result; - } - - // //////////////////////SolrInfoMBeans methods ////////////////////// - - @Override - public String getDescription() { - return DataImporter.MSG.JMX_DESC; - } - - @Override - public String getSourceId() { - return "$Id: DataImportHandler.java 788580 2009-06-26 05:20:23Z noble $"; - } - - @Override - public String getVersion() { - return "1.0"; - } - - @Override - public String getSource() { - return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.4/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java $"; - } - - public static final String ENABLE_DEBUG = "enableDebug"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,72 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.dataimport; - -import java.util.Properties; - -/** - *

- * Provides data from a source with a given query. - *

- *

- *

- * Implementation of this abstract class must provide a default no-arg constructor - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: DataSource.java 684025 2008-08-08 17:50:11Z shalin $ - * @since solr 1.3 - */ -public abstract class DataSource { - - /** - * Initializes the DataSource with the Context and - * initialization properties. - *

- * This is invoked by the DataImporter after creating an - * instance of this class. - * - * @param context - * @param initProps - */ - public abstract void init(Context context, Properties initProps); - - /** - * Get records for the given query.The return type depends on the - * implementation . - * - * @param query The query string. It can be a SQL for JdbcDataSource or a URL - * for HttpDataSource or a file location for FileDataSource or a custom - * format for your own custom DataSource. - * @return Depends on the implementation. For instance JdbcDataSource returns - * an Iterator> - */ - public abstract T getData(String query); - - /** - * Cleans up resources of this DataSource after use. - */ - public abstract void close(); -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,93 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.dataimport; - -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - *

- * Transformer instance which creates Date instances out of Strings. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @version $Id: DateFormatTransformer.java 762180 2009-04-05 22:50:10Z shalin $ - * @since solr 1.3 - */ -public class DateFormatTransformer extends Transformer { - private Map fmtCache = new HashMap(); - private static final Logger LOG = LoggerFactory - .getLogger(DateFormatTransformer.class); - - @SuppressWarnings("unchecked") - public Object transformRow(Map aRow, Context context) { - for (Map map : context.getAllEntityFields()) { - String fmt = map.get(DATE_TIME_FMT); - if (fmt == null) - continue; - String column = map.get(DataImporter.COLUMN); - String srcCol = map.get(RegexTransformer.SRC_COL_NAME); - if (srcCol == null) - srcCol = column; - try { - Object o = aRow.get(srcCol); - if (o instanceof List) { - List inputs = (List) o; - List results = new ArrayList(); - for (Object input : inputs) { - results.add(process(input, fmt)); - } - aRow.put(column, results); - } else { - if (o != null) { - aRow.put(column, process(o, fmt)); - } - } - } catch (ParseException e) { - LOG.warn("Could not parse a Date field ", e); - } - } - return aRow; - } - - private Date process(Object value, String format) throws ParseException { - if (value == null) return null; - String strVal = value.toString().trim(); - if (strVal.length() == 0) - return null; - SimpleDateFormat fmt = fmtCache.get(format); - if (fmt == null) { - fmt = new SimpleDateFormat(format); - fmtCache.put(format, fmt); - } - return fmt.parse(strVal); - } - - public static final String DATE_TIME_FMT = "dateTimeFormat"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,281 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.common.util.NamedList; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.text.MessageFormat; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Stack; - -/** - *

- * Implements most of the interactive development functionality - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @version $Id: DebugLogger.java 819262 2009-09-27 06:27:28Z shalin $ - * @since solr 1.3 - */ -class DebugLogger { - private Stack debugStack; - - NamedList output; - private final SolrWriter writer; - - private static final String LINE = "---------------------------------------------"; - - private MessageFormat fmt = new MessageFormat( - "----------- row #{0}-------------"); - - boolean enabled = true; - - public DebugLogger(SolrWriter solrWriter) { - writer = solrWriter; - output = new NamedList(); - debugStack = new Stack() { - - public DebugInfo pop() { - if (size() == 1) - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, "Stack is becoming empty"); - return super.pop(); - } - }; - debugStack.push(new DebugInfo(null, -1, null)); - output = debugStack.peek().lst; - } - - private DebugInfo peekStack() { - return debugStack.isEmpty() ? null : debugStack.peek(); - } - - public void log(int event, String name, Object row) { - if (event == SolrWriter.DISABLE_LOGGING) { - enabled = false; - return; - } else if (event == SolrWriter.ENABLE_LOGGING) { - enabled = true; - return; - } - - if (!enabled && event != SolrWriter.START_ENTITY - && event != SolrWriter.END_ENTITY) { - return; - } - - if (event == SolrWriter.START_DOC) { - debugStack.push(new DebugInfo(null, SolrWriter.START_DOC, peekStack())); - } else if (SolrWriter.START_ENTITY == event) { - debugStack - .push(new DebugInfo(name, SolrWriter.START_ENTITY, peekStack())); - } else if (SolrWriter.ENTITY_OUT == event - || SolrWriter.PRE_TRANSFORMER_ROW == event) { - if (debugStack.peek().type == SolrWriter.START_ENTITY - || debugStack.peek().type == SolrWriter.START_DOC) { - debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack - .peek().rowCount})); - addToNamedList(debugStack.peek().lst, row); - debugStack.peek().lst.add(null, LINE); - } - } else if (event == SolrWriter.ROW_END) { - popAllTransformers(); - } else if (SolrWriter.END_ENTITY == event) { - while (debugStack.pop().type != SolrWriter.START_ENTITY) - ; - } else if (SolrWriter.END_DOC == event) { - while (debugStack.pop().type != SolrWriter.START_DOC) - ; - } else if (event == SolrWriter.TRANSFORMER_EXCEPTION) { - debugStack.push(new DebugInfo(name, event, peekStack())); - debugStack.peek().lst.add("EXCEPTION", - getStacktraceString((Exception) row)); - } else if (SolrWriter.TRANSFORMED_ROW == event) { - debugStack.push(new DebugInfo(name, event, peekStack())); - debugStack.peek().lst.add(null, LINE); - addToNamedList(debugStack.peek().lst, row); - debugStack.peek().lst.add(null, LINE); - if (row instanceof DataImportHandlerException) { - DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row; - dataImportHandlerException.debugged = true; - } - } else if (SolrWriter.ENTITY_META == event) { - popAllTransformers(); - debugStack.peek().lst.add(name, row); - } else if (SolrWriter.ENTITY_EXCEPTION == event) { - if (row instanceof DataImportHandlerException) { - DataImportHandlerException dihe = (DataImportHandlerException) row; - if (dihe.debugged) - return; - dihe.debugged = true; - } - - popAllTransformers(); - debugStack.peek().lst.add("EXCEPTION", - getStacktraceString((Exception) row)); - } - } - - private void popAllTransformers() { - while (true) { - int type = debugStack.peek().type; - if (type == SolrWriter.START_DOC || type == SolrWriter.START_ENTITY) - break; - debugStack.pop(); - } - } - - private void addToNamedList(NamedList nl, Object row) { - if (row instanceof List) { - List list = (List) row; - NamedList l = new NamedList(); - nl.add(null, l); - for (Object o : list) { - Map map = (Map) o; - for (Map.Entry entry : map.entrySet()) - nl.add(entry.getKey(), entry.getValue()); - } - } else if (row instanceof Map) { - Map map = (Map) row; - for (Map.Entry entry : map.entrySet()) - nl.add(entry.getKey(), entry.getValue()); - } - } - - DataSource wrapDs(final DataSource ds) { - return new DataSource() { - public void init(Context context, Properties initProps) { - ds.init(context, initProps); - } - - public void close() { - ds.close(); - } - - public Object getData(String query) { - writer.log(SolrWriter.ENTITY_META, "query", query); - long start = System.currentTimeMillis(); - try { - return ds.getData(query); - } catch (DataImportHandlerException de) { - writer.log(SolrWriter.ENTITY_EXCEPTION, - null, de); - throw de; - } catch (Exception e) { - writer.log(SolrWriter.ENTITY_EXCEPTION, - null, e); - DataImportHandlerException de = new DataImportHandlerException( - DataImportHandlerException.SEVERE, "", e); - de.debugged = true; - throw de; - } finally { - writer.log(SolrWriter.ENTITY_META, "time-taken", DocBuilder - .getTimeElapsedSince(start)); - } - } - }; - } - - Transformer wrapTransformer(final Transformer t) { - return new Transformer() { - public Object transformRow(Map row, Context context) { - writer.log(SolrWriter.PRE_TRANSFORMER_ROW, null, row); - String tName = getTransformerName(t); - Object result = null; - try { - result = t.transformRow(row, context); - writer.log(SolrWriter.TRANSFORMED_ROW, tName, result); - } catch (DataImportHandlerException de) { - writer.log(SolrWriter.TRANSFORMER_EXCEPTION, tName, de); - de.debugged = true; - throw de; - } catch (Exception e) { - writer.log(SolrWriter.TRANSFORMER_EXCEPTION, tName, e); - DataImportHandlerException de = new DataImportHandlerException(DataImportHandlerException.SEVERE, "", e); - de.debugged = true; - throw de; - } - return result; - } - }; - } - - public static String getStacktraceString(Exception e) { - StringWriter sw = new StringWriter(); - e.printStackTrace(new PrintWriter(sw)); - return sw.toString(); - } - - static String getTransformerName(Transformer t) { - Class transClass = t.getClass(); - if (t instanceof EntityProcessorWrapper.ReflectionTransformer) { - return ((EntityProcessorWrapper.ReflectionTransformer) t).trans; - } - if (t instanceof ScriptTransformer) { - ScriptTransformer scriptTransformer = (ScriptTransformer) t; - return "script:" + scriptTransformer.getFunctionName(); - } - if (transClass.getPackage().equals(DebugLogger.class.getPackage())) { - return transClass.getSimpleName(); - } else { - return transClass.getName(); - } - } - - private static class DebugInfo { - String name; - - int tCount, rowCount; - - NamedList lst; - - int type; - - DebugInfo parent; - - public DebugInfo(String name, int type, DebugInfo parent) { - this.name = name; - this.type = type; - this.parent = parent; - lst = new NamedList(); - if (parent != null) { - String displayName = null; - if (type == SolrWriter.START_ENTITY) { - displayName = "entity:" + name; - } else if (type == SolrWriter.TRANSFORMED_ROW - || type == SolrWriter.TRANSFORMER_EXCEPTION) { - displayName = "transformer:" + name; - } else if (type == SolrWriter.START_DOC) { - this.name = displayName = "document#" + SolrWriter.getDocCount(); - } - parent.lst.add(displayName, lst); - } - } - } - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,791 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.core.SolrCore; -import static org.apache.solr.handler.dataimport.SolrWriter.LAST_INDEX_KEY; -import org.apache.solr.schema.SchemaField; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; -import java.text.ParseException; - -/** - *

DocBuilder is responsible for creating Solr documents out of the given configuration. It also maintains - * statistics information. It depends on the EntityProcessor implementations to fetch data.

- *

- * This API is experimental and subject to change - * - * @version $Id: DocBuilder.java 823446 2009-10-09 08:05:37Z noble $ - * @since solr 1.3 - */ -public class DocBuilder { - - private static final Logger LOG = LoggerFactory.getLogger(DocBuilder.class); - - private static final Date EPOCH = new Date(0); - - DataImporter dataImporter; - - private DataConfig.Document document; - - private DataConfig.Entity root; - - @SuppressWarnings("unchecked") - private Map statusMessages = Collections.synchronizedMap(new LinkedHashMap()); - - public Statistics importStatistics = new Statistics(); - - SolrWriter writer; - - DataImporter.RequestParams requestParameters; - - boolean verboseDebug = false; - - private Map session = new HashMap(); - - static final ThreadLocal INSTANCE = new ThreadLocal(); - Map functionsNamespace; - private Properties persistedProperties; - - public DocBuilder(DataImporter dataImporter, SolrWriter writer, DataImporter.RequestParams reqParams) { - INSTANCE.set(this); - this.dataImporter = dataImporter; - this.writer = writer; - DataImporter.QUERY_COUNT.set(importStatistics.queryCount); - requestParameters = reqParams; - verboseDebug = requestParameters.debug && requestParameters.verbose; - functionsNamespace = EvaluatorBag.getFunctionsNamespace(this.dataImporter.getConfig().functions, this); - persistedProperties = writer.readIndexerProperties(); - } - - public VariableResolverImpl getVariableResolver() { - try { - VariableResolverImpl resolver = new VariableResolverImpl(); - Map indexerNamespace = new HashMap(); - if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) { - indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME)); - } else { - // set epoch - indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH)); - } - indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime()); - indexerNamespace.put("request", requestParameters.requestParams); - indexerNamespace.put("functions", functionsNamespace); - for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) { - String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY; - String lastIndex = persistedProperties.getProperty(key); - if (lastIndex != null) { - indexerNamespace.put(key, lastIndex); - } else { - indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH)); - } - } - resolver.addNamespace(DataConfig.IMPORTER_NS_SHORT, indexerNamespace); - resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace); - return resolver; - } catch (Exception e) { - DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e); - // unreachable statement - return null; - } - } - - private void invokeEventListener(String className) { - try { - EventListener listener = (EventListener) loadClass(className, dataImporter.getCore()).newInstance(); - notifyListener(listener); - } catch (Exception e) { - DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e, "Unable to load class : " + className); - } - } - - private void notifyListener(EventListener listener) { - String currentProcess; - if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) { - currentProcess = Context.DELTA_DUMP; - } else { - currentProcess = Context.FULL_DUMP; - } - listener.onEvent(new ContextImpl(null, getVariableResolver(), null, currentProcess, session, null, this)); - } - - @SuppressWarnings("unchecked") - public void execute() { - dataImporter.store(DataImporter.STATUS_MSGS, statusMessages); - document = dataImporter.getConfig().document; - final AtomicLong startTime = new AtomicLong(System.currentTimeMillis()); - statusMessages.put(TIME_ELAPSED, new Object() { - public String toString() { - return getTimeElapsedSince(startTime.get()); - } - }); - - statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, - importStatistics.queryCount); - statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, - importStatistics.rowsCount); - statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, - importStatistics.docCount); - statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, - importStatistics.skipDocCount); - - List entities = requestParameters.entities; - - // Trigger onImportStart - if (document.onImportStart != null) { - invokeEventListener(document.onImportStart); - } - AtomicBoolean fullCleanDone = new AtomicBoolean(false); - //we must not do a delete of *:* multiple times if there are multiple root entities to be run - Properties lastIndexTimeProps = new Properties(); - lastIndexTimeProps.setProperty(LAST_INDEX_KEY, - DataImporter.DATE_TIME_FORMAT.get().format(dataImporter.getIndexStartTime())); - for (DataConfig.Entity e : document.entities) { - if (entities != null && !entities.contains(e.name)) - continue; - lastIndexTimeProps.setProperty(e.name + "." + LAST_INDEX_KEY, - DataImporter.DATE_TIME_FORMAT.get().format(new Date())); - root = e; - String delQuery = e.allAttributes.get("preImportDeleteQuery"); - if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) { - cleanByQuery(delQuery, fullCleanDone); - doDelta(); - delQuery = e.allAttributes.get("postImportDeleteQuery"); - if (delQuery != null) { - fullCleanDone.set(false); - cleanByQuery(delQuery, fullCleanDone); - } - } else { - cleanByQuery(delQuery, fullCleanDone); - doFullDump(); - delQuery = e.allAttributes.get("postImportDeleteQuery"); - if (delQuery != null) { - fullCleanDone.set(false); - cleanByQuery(delQuery, fullCleanDone); - } - } - statusMessages.remove(DataImporter.MSG.TOTAL_DOC_PROCESSED); - } - - if (stop.get()) { - // Dont commit if aborted using command=abort - statusMessages.put("Aborted", DataImporter.DATE_TIME_FORMAT.get().format(new Date())); - rollback(); - } else { - // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted - if (!requestParameters.clean) { - if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) { - finish(lastIndexTimeProps); - } - } else { - // Finished operation normally, commit now - finish(lastIndexTimeProps); - } - if (document.onImportEnd != null) { - invokeEventListener(document.onImportEnd); - } - } - - statusMessages.remove(TIME_ELAPSED); - statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, ""+ importStatistics.docCount.get()); - if(importStatistics.failedDocCount.get() > 0) - statusMessages.put(DataImporter.MSG.TOTAL_FAILED_DOCS, ""+ importStatistics.failedDocCount.get()); - - statusMessages.put("Time taken ", getTimeElapsedSince(startTime.get())); - LOG.info("Time taken = " + getTimeElapsedSince(startTime.get())); - } - - @SuppressWarnings("unchecked") - private void finish(Properties lastIndexTimeProps) { - LOG.info("Import completed successfully"); - statusMessages.put("", "Indexing completed. Added/Updated: " - + importStatistics.docCount + " documents. Deleted " - + importStatistics.deletedDocCount + " documents."); - if(requestParameters.commit) { - writer.commit(requestParameters.optimize); - addStatusMessage("Committed"); - if (requestParameters.optimize) - addStatusMessage("Optimized"); - } - writer.persist(lastIndexTimeProps); - } - - void rollback() { - writer.rollback(); - statusMessages.put("", "Indexing failed. Rolled back all changes."); - addStatusMessage("Rolledback"); - } - - @SuppressWarnings("unchecked") - private void doFullDump() { - addStatusMessage("Full Dump Started"); - buildDocument(getVariableResolver(), null, null, root, true, - null); - } - - @SuppressWarnings("unchecked") - private void doDelta() { - addStatusMessage("Delta Dump started"); - VariableResolverImpl resolver = getVariableResolver(); - - if (document.deleteQuery != null) { - writer.deleteByQuery(document.deleteQuery); - } - - addStatusMessage("Identifying Delta"); - LOG.info("Starting delta collection."); - Set> deletedKeys = new HashSet>(); - Set> allPks = collectDelta(root, resolver, deletedKeys); - if (stop.get()) - return; - addStatusMessage("Deltas Obtained"); - addStatusMessage("Building documents"); - if (!deletedKeys.isEmpty()) { - allPks.removeAll(deletedKeys); - deleteAll(deletedKeys); - // Make sure that documents are not re-created - } - deletedKeys = null; - - statusMessages.put("Total Changed Documents", allPks.size()); - VariableResolverImpl vri = getVariableResolver(); - Iterator> pkIter = allPks.iterator(); - while (pkIter.hasNext()) { - Map map = pkIter.next(); - vri.addNamespace(DataConfig.IMPORTER_NS_SHORT + ".delta", map); - buildDocument(vri, null, map, root, true, null); - pkIter.remove(); - // check for abort - if (stop.get()) - break; - } - - if (!stop.get()) { - LOG.info("Delta Import completed successfully"); - } - } - - private void deleteAll(Set> deletedKeys) { - LOG.info("Deleting stale documents "); - Iterator> iter = deletedKeys.iterator(); - while (iter.hasNext()) { - Map map = iter.next(); - Object key = map.get(root.getPk()); - if(key == null) { - LOG.warn("no key was available for deleteted pk query"); - continue; - } - writer.deleteDoc(key); - importStatistics.deletedDocCount.incrementAndGet(); - iter.remove(); - } - } - - @SuppressWarnings("unchecked") - public void addStatusMessage(String msg) { - statusMessages.put(msg, DataImporter.DATE_TIME_FORMAT.get().format(new Date())); - } - - @SuppressWarnings("unchecked") - private void buildDocument(VariableResolverImpl vr, DocWrapper doc, - Map pk, DataConfig.Entity entity, boolean isRoot, - ContextImpl parentCtx) { - - EntityProcessorWrapper entityProcessor = getEntityProcessor(entity); - - ContextImpl ctx = new ContextImpl(entity, vr, null, - pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP, - session, parentCtx, this); - entityProcessor.init(ctx); - - if (requestParameters.start > 0) { - writer.log(SolrWriter.DISABLE_LOGGING, null, null); - } - - if (verboseDebug) { - writer.log(SolrWriter.START_ENTITY, entity.name, null); - } - - int seenDocCount = 0; - - try { - while (true) { - if (stop.get()) - return; - if(importStatistics.docCount.get() > (requestParameters.start + requestParameters.rows)) break; - try { - seenDocCount++; - - if (seenDocCount > requestParameters.start) { - writer.log(SolrWriter.ENABLE_LOGGING, null, null); - } - - if (verboseDebug && entity.isDocRoot) { - writer.log(SolrWriter.START_DOC, entity.name, null); - } - if (doc == null && entity.isDocRoot) { - doc = new DocWrapper(); - ctx.setDoc(doc); - DataConfig.Entity e = entity; - while (e.parentEntity != null) { - addFields(e.parentEntity, doc, (Map) vr - .resolve(e.parentEntity.name), vr); - e = e.parentEntity; - } - } - - Map arow = entityProcessor.nextRow(); - if (arow == null) { - break; - } - - // Support for start parameter in debug mode - if (entity.isDocRoot) { - if (seenDocCount <= requestParameters.start) - continue; - if (seenDocCount > requestParameters.start + requestParameters.rows) { - LOG.info("Indexing stopped at docCount = " + importStatistics.docCount); - break; - } - } - - if (verboseDebug) { - writer.log(SolrWriter.ENTITY_OUT, entity.name, arow); - } - importStatistics.rowsCount.incrementAndGet(); - if (doc != null) { - handleSpecialCommands(arow, doc); - addFields(entity, doc, arow, vr); - } - if (entity.entities != null) { - vr.addNamespace(entity.name, arow); - for (DataConfig.Entity child : entity.entities) { - buildDocument(vr, doc, null, child, false, ctx); - } - vr.removeNamespace(entity.name); - } - - if (entity.isDocRoot) { - if (stop.get()) - return; - if (!doc.isEmpty()) { - boolean result = writer.upload(doc); - doc = null; - if (result){ - importStatistics.docCount.incrementAndGet(); - } else { - importStatistics.failedDocCount.incrementAndGet(); - } - } - } - - } catch (DataImportHandlerException e) { - if (verboseDebug) { - writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e); - } - if(e.getErrCode() == DataImportHandlerException.SKIP_ROW){ - continue; - } - if (isRoot) { - if (e.getErrCode() == DataImportHandlerException.SKIP) { - importStatistics.skipDocCount.getAndIncrement(); - } else { - LOG.error("Exception while processing: " - + entity.name + " document : " + doc, e); - } - if (e.getErrCode() == DataImportHandlerException.SEVERE) - throw e; - } else - throw e; - } catch (Throwable t) { - if (verboseDebug) { - writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, t); - } - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t); - } finally { - if (verboseDebug) { - writer.log(SolrWriter.ROW_END, entity.name, null); - if (entity.isDocRoot) - writer.log(SolrWriter.END_DOC, null, null); - } - } - } - } finally { - if (verboseDebug) { - writer.log(SolrWriter.END_ENTITY, null, null); - } - entityProcessor.destroy(); - } - } - - static class DocWrapper extends SolrInputDocument { - //final SolrInputDocument solrDocument = new SolrInputDocument(); - Map session; - - public void setSessionAttribute(String key, Object val){ - if(session == null) session = new HashMap(); - session.put(key, val); - } - - public Object getSessionAttribute(String key) { - return session == null ? null : session.get(key); - } - } - - private void handleSpecialCommands(Map arow, DocWrapper doc) { - Object value = arow.get("$deleteDocById"); - if (value != null) { - if (value instanceof Collection) { - Collection collection = (Collection) value; - for (Object o : collection) { - writer.deleteDoc(o.toString()); - } - } else { - writer.deleteDoc(value); - } - } - value = arow.get("$deleteDocByQuery"); - if (value != null) { - if (value instanceof Collection) { - Collection collection = (Collection) value; - for (Object o : collection) { - writer.deleteByQuery(o.toString()); - } - } else { - writer.deleteByQuery(value.toString()); - } - } - value = arow.get("$docBoost"); - if (value != null) { - float value1 = 1.0f; - if (value instanceof Number) { - value1 = ((Number) value).floatValue(); - } else { - value1 = Float.parseFloat(value.toString()); - } - doc.setDocumentBoost(value1); - } - - value = arow.get("$skipDoc"); - if (value != null) { - if (Boolean.parseBoolean(value.toString())) { - throw new DataImportHandlerException(DataImportHandlerException.SKIP, - "Document skipped :" + arow); - } - } - - value = arow.get("$skipRow"); - if (value != null) { - if (Boolean.parseBoolean(value.toString())) { - throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW); - } - } - } - - @SuppressWarnings("unchecked") - private void addFields(DataConfig.Entity entity, DocWrapper doc, - Map arow, VariableResolver vr) { - for (Map.Entry entry : arow.entrySet()) { - String key = entry.getKey(); - Object value = entry.getValue(); - if (value == null) continue; - if (key.startsWith("$")) continue; - List field = entity.colNameVsField.get(key); - if (field == null && dataImporter.getSchema() != null) { - // This can be a dynamic field or a field which does not have an entry in data-config ( an implicit field) - SchemaField sf = dataImporter.getSchema().getFieldOrNull(key); - if (sf == null) { - sf = dataImporter.getConfig().lowerNameVsSchemaField.get(key.toLowerCase()); - } - if (sf != null) { - addFieldToDoc(entry.getValue(), sf.getName(), 1.0f, sf.multiValued(), doc); - } - //else do nothing. if we add it it may fail - } else { - if (field != null) { - for (DataConfig.Field f : field) { - String name = f.getName(); - if(f.dynamicName){ - name = vr.replaceTokens(name); - } - if (f.toWrite) addFieldToDoc(entry.getValue(), name, f.boost, f.multiValued, doc); - } - } - } - } - } - - private void addFieldToDoc(Object value, String name, float boost, boolean multiValued, DocWrapper doc) { - if (value instanceof Collection) { - Collection collection = (Collection) value; - if (multiValued) { - for (Object o : collection) { - if (o != null) - doc.addField(name, o, boost); - } - } else { - if (doc.getField(name) == null) - for (Object o : collection) { - if (o != null) { - doc.addField(name, o, boost); - break; - } - } - } - } else if (multiValued) { - if (value != null) { - doc.addField(name, value, boost); - } - } else { - if (doc.getField(name) == null && value != null) - doc.addField(name, value, boost); - } - } - - private EntityProcessorWrapper getEntityProcessor(DataConfig.Entity entity) { - if (entity.processor != null) - return entity.processor; - EntityProcessor entityProcessor; - if (entity.proc == null) { - entityProcessor = new SqlEntityProcessor(); - } else { - try { - entityProcessor = (EntityProcessor) loadClass(entity.proc, dataImporter.getCore()) - .newInstance(); - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Unable to load EntityProcessor implementation for entity:" - + entity.name, e); - } - } - return entity.processor = new EntityProcessorWrapper(entityProcessor, this); - } - - /** - *

Collects unique keys of all Solr documents for whom one or more source tables have been changed since the last - * indexed time.

Note: In our definition, unique key of Solr document is the primary key of the top level - * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml

- * - * @return an iterator to the list of keys for which Solr documents should be updated. - */ - @SuppressWarnings("unchecked") - public Set> collectDelta(DataConfig.Entity entity, VariableResolverImpl resolver, - Set> deletedRows) { - //someone called abort - if (stop.get()) - return new HashSet(); - - EntityProcessor entityProcessor = getEntityProcessor(entity); - ContextImpl context1 = new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this); - entityProcessor.init(context1); - - Set> myModifiedPks = new HashSet>(); - - if (entity.entities != null) { - - for (DataConfig.Entity entity1 : entity.entities) { - //this ensures that we start from the leaf nodes - myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows)); - //someone called abort - if (stop.get()) - return new HashSet(); - } - - } - // identifying the modified rows for this entity - - Set> deltaSet = new HashSet>(); - LOG.info("Running ModifiedRowKey() for Entity: " + entity.name); - //get the modified rows in this entity - while (true) { - Map row = entityProcessor.nextModifiedRowKey(); - - if (row == null) - break; - - deltaSet.add(row); - importStatistics.rowsCount.incrementAndGet(); - // check for abort - if (stop.get()) - return new HashSet(); - } - //get the deleted rows for this entity - Set> deletedSet = new HashSet>(); - Set> deltaRemoveSet = new HashSet>(); - while (true) { - Map row = entityProcessor.nextDeletedRowKey(); - if (row == null) - break; - - //Check to see if this delete is in the current delta set - for (Map modifiedRow : deltaSet) { - if (modifiedRow.get(entity.getPk()).equals(row.get(entity.getPk()))) { - deltaRemoveSet.add(modifiedRow); - } - } - - deletedSet.add(row); - importStatistics.rowsCount.incrementAndGet(); - // check for abort - if (stop.get()) - return new HashSet(); - } - - //asymmetric Set difference - deltaSet.removeAll(deltaRemoveSet); - - LOG.info("Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size()); - LOG.info("Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size()); - - myModifiedPks.addAll(deltaSet); - Set> parentKeyList = new HashSet>(); - //all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these - //propogate up the changes in the chain - if (entity.parentEntity != null) { - // identifying deleted rows with deltas - - for (Map row : myModifiedPks) { - getModifiedParentRows(resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); - // check for abort - if (stop.get()) - return new HashSet(); - } - // running the same for deletedrows - for (Map row : deletedSet) { - getModifiedParentRows(resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); - // check for abort - if (stop.get()) - return new HashSet(); - } - } - LOG.info("Completed parentDeltaQuery for Entity: " + entity.name); - if (entity.isDocRoot) - deletedRows.addAll(deletedSet); - - return entity.isDocRoot ? myModifiedPks : new HashSet>( - parentKeyList); - } - - private void getModifiedParentRows(VariableResolverImpl resolver, - String entity, EntityProcessor entityProcessor, - Set> parentKeyList) { - try { - while (true) { - Map parentRow = entityProcessor - .nextModifiedParentRowKey(); - if (parentRow == null) - break; - - parentKeyList.add(parentRow); - importStatistics.rowsCount.incrementAndGet(); - // check for abort - if (stop.get()) - return; - } - - } finally { - resolver.removeNamespace(entity); - } - } - - public void abort() { - stop.set(true); - } - - private AtomicBoolean stop = new AtomicBoolean(false); - - public static final String TIME_ELAPSED = "Time Elapsed"; - - static String getTimeElapsedSince(long l) { - l = System.currentTimeMillis() - l; - return (l / (60000 * 60)) % 60 + ":" + (l / 60000) % 60 + ":" + (l / 1000) - % 60 + "." + l % 1000; - } - - @SuppressWarnings("unchecked") - static Class loadClass(String name, SolrCore core) throws ClassNotFoundException { - try { - return core != null ? - core.getResourceLoader().findClass(name) : - Class.forName(name); - } catch (Exception e) { - try { - String n = DocBuilder.class.getPackage().getName() + "." + name; - return core != null ? - core.getResourceLoader().findClass(n) : - Class.forName(n); - } catch (Exception e1) { - throw new ClassNotFoundException("Unable to load " + name + " or " + DocBuilder.class.getPackage().getName() + "." + name, e); - } - } - } - - public static class Statistics { - public AtomicLong docCount = new AtomicLong(); - - public AtomicLong deletedDocCount = new AtomicLong(); - - public AtomicLong failedDocCount = new AtomicLong(); - - public AtomicLong rowsCount = new AtomicLong(); - - public AtomicLong queryCount = new AtomicLong(); - - public AtomicLong skipDocCount = new AtomicLong(); - - public Statistics add(Statistics stats) { - this.docCount.addAndGet(stats.docCount.get()); - this.deletedDocCount.addAndGet(stats.deletedDocCount.get()); - this.rowsCount.addAndGet(stats.rowsCount.get()); - this.queryCount.addAndGet(stats.queryCount.get()); - - return this; - } - - public Map getStatsSnapshot() { - Map result = new HashMap(); - result.put("docCount", docCount.get()); - result.put("deletedDocCount", deletedDocCount.get()); - result.put("rowCount", rowsCount.get()); - result.put("queryCount", rowsCount.get()); - result.put("skipDocCount", skipDocCount.get()); - return result; - } - - } - - private void cleanByQuery(String delQuery, AtomicBoolean completeCleanDone) { - delQuery = getVariableResolver().replaceTokens(delQuery); - if (requestParameters.clean) { - if (delQuery == null && !completeCleanDone.get()) { - writer.doDeleteAll(); - completeCleanDone.set(true); - } else if (delQuery != null) { - writer.deleteByQuery(delQuery); - } - } - } - - public static final String LAST_INDEX_TIME = "last_index_time"; - public static final String INDEX_START_TIME = "index_start_time"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,291 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; - -/** - *

Base class for all implementations of EntityProcessor

Most implementations of EntityProcessor - * extend this base class which provides common functionality.

- *

- * This API is experimental and subject to change - * - * @version $Id: EntityProcessorBase.java 816082 2009-09-17 07:41:14Z noble $ - * @since solr 1.3 - */ -public class EntityProcessorBase extends EntityProcessor { - private static final Logger log = LoggerFactory.getLogger(EntityProcessorBase.class); - - protected boolean isFirstInit = true; - - protected String entityName; - - protected Context context; - - protected VariableResolverImpl resolver; - - protected Iterator> rowIterator; - - protected List transformers; - - protected String query; - - protected String onError = ABORT; - - - public void init(Context context) { - rowIterator = null; - this.context = context; - resolver = (VariableResolverImpl) context.getVariableResolver(); - if (isFirstInit) { - firstInit(context); - } - query = null; - } - - /**first time init call. do one-time operations here - */ - protected void firstInit(Context context) { - entityName = context.getEntityAttribute("name"); - String s = context.getEntityAttribute(ON_ERROR); - if (s != null) onError = s; - isFirstInit = false; - } - - - protected Map getNext() { - try { - if (rowIterator == null) - return null; - if (rowIterator.hasNext()) - return rowIterator.next(); - query = null; - rowIterator = null; - return null; - } catch (Exception e) { - log.error("getNext() failed for query '" + query + "'", e); - query = null; - rowIterator = null; - wrapAndThrow(DataImportHandlerException.WARN, e); - return null; - } - } - - public Map nextModifiedRowKey() { - return null; - } - - public Map nextDeletedRowKey() { - return null; - } - - public Map nextModifiedParentRowKey() { - return null; - } - - /** - * For a simple implementation, this is the only method that the sub-class should implement. This is intended to - * stream rows one-by-one. Return null to signal end of rows - * - * @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return - * null to signal end of rows - */ - public Map nextRow() { - return null;// do not do anything - } - - - public void destroy() { - /*no op*/ - } - - /** - * Only used by cache implementations - */ - protected String cachePk; - - /** - * Only used by cache implementations - */ - protected String cacheVariableName; - - /** - * Only used by cache implementations - */ - protected Map>> simpleCache; - - /** - * Only used by cache implementations - */ - protected Map>>> cacheWithWhereClause; - - protected List> dataSourceRowCache; - - /** - * Only used by cache implementations - */ - protected void cacheInit() { - if (simpleCache != null || cacheWithWhereClause != null) - return; - String where = context.getEntityAttribute("where"); - - String cacheKey = context.getEntityAttribute(CACHE_KEY); - String lookupKey = context.getEntityAttribute(CACHE_LOOKUP); - if(cacheKey != null && lookupKey == null){ - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'cacheKey' is specified for the entity "+ entityName+" but 'cacheLookup' is missing" ); - - } - if (where == null && cacheKey == null) { - simpleCache = new HashMap>>(); - } else { - if (where != null) { - String[] splits = where.split("="); - cachePk = splits[0]; - cacheVariableName = splits[1].trim(); - } else { - cachePk = cacheKey; - cacheVariableName = lookupKey; - } - cacheWithWhereClause = new HashMap>>>(); - } - } - - /** - * If the where clause is present the cache is sql Vs Map of key Vs List of Rows. Only used by cache implementations. - * - * @param query the query string for which cached data is to be returned - * - * @return the cached row corresponding to the given query after all variables have been resolved - */ - protected Map getIdCacheData(String query) { - Map>> rowIdVsRows = cacheWithWhereClause - .get(query); - List> rows = null; - Object key = resolver.resolve(cacheVariableName); - if (key == null) { - throw new DataImportHandlerException(DataImportHandlerException.WARN, - "The cache lookup value : " + cacheVariableName + " is resolved to be null in the entity :" + - context.getEntityAttribute("name")); - - } - if (rowIdVsRows != null) { - rows = rowIdVsRows.get(key); - if (rows == null) - return null; - dataSourceRowCache = new ArrayList>(rows); - return getFromRowCacheTransformed(); - } else { - rows = getAllNonCachedRows(); - if (rows.isEmpty()) { - return null; - } else { - rowIdVsRows = new HashMap>>(); - for (Map row : rows) { - Object k = row.get(cachePk); - if (k == null) { - throw new DataImportHandlerException(DataImportHandlerException.WARN, - "No value available for the cache key : " + cachePk + " in the entity : " + - context.getEntityAttribute("name")); - } - if (!k.getClass().equals(key.getClass())) { - throw new DataImportHandlerException(DataImportHandlerException.WARN, - "The key in the cache type : " + k.getClass().getName() + - "is not same as the lookup value type " + key.getClass().getName() + " in the entity " + - context.getEntityAttribute("name")); - } - if (rowIdVsRows.get(k) == null) - rowIdVsRows.put(k, new ArrayList>()); - rowIdVsRows.get(k).add(row); - } - cacheWithWhereClause.put(query, rowIdVsRows); - if (!rowIdVsRows.containsKey(key)) - return null; - dataSourceRowCache = new ArrayList>(rowIdVsRows.get(key)); - if (dataSourceRowCache.isEmpty()) { - dataSourceRowCache = null; - return null; - } - return getFromRowCacheTransformed(); - } - } - } - - /** - *

Get all the rows from the the datasource for the given query. Only used by cache implementations.

This - * must be implemented by sub-classes which intend to provide a cached implementation - * - * @return the list of all rows fetched from the datasource. - */ - protected List> getAllNonCachedRows() { - return Collections.EMPTY_LIST; - } - - /** - * If where clause is not present the cache is a Map of query vs List of Rows. Only used by cache implementations. - * - * @param query string for which cached row is to be returned - * - * @return the cached row corresponding to the given query - */ - protected Map getSimpleCacheData(String query) { - List> rows = simpleCache.get(query); - if (rows != null) { - dataSourceRowCache = new ArrayList>(rows); - return getFromRowCacheTransformed(); - } else { - rows = getAllNonCachedRows(); - if (rows.isEmpty()) { - return null; - } else { - dataSourceRowCache = new ArrayList>(rows); - simpleCache.put(query, rows); - return getFromRowCacheTransformed(); - } - } - } - - protected Map getFromRowCacheTransformed() { - Map r = dataSourceRowCache.remove(0); - if (dataSourceRowCache.isEmpty()) - dataSourceRowCache = null; - return r; - } - - public static final String TRANSFORMER = "transformer"; - - public static final String TRANSFORM_ROW = "transformRow"; - - public static final String ON_ERROR = "onError"; - - public static final String ABORT = "abort"; - - public static final String CONTINUE = "continue"; - - public static final String SKIP = "skip"; - - public static final String SKIP_DOC = "$skipDoc"; - - public static final String CACHE_KEY = "cacheKey"; - - public static final String CACHE_LOOKUP = "cacheLookup"; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.Map; - -/** - *

- * An instance of entity processor serves an entity. It is reused throughout the - * import process. - *

- *

- *

- * Implementations of this abstract class must provide a public no-args constructor. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: EntityProcessor.java 824359 2009-10-12 14:31:54Z ehatcher $ - * @since solr 1.3 - */ -public abstract class EntityProcessor { - - /** - * This method is called when it starts processing an entity. When it comes - * back to the entity it is called again. So it can reset anything at that point. - * For a rootmost entity this is called only once for an ingestion. For sub-entities , this - * is called multiple once for each row from its parent entity - * - * @param context The current context - */ - public abstract void init(Context context); - - /** - * This method helps streaming the data for each row . The implementation - * would fetch as many rows as needed and gives one 'row' at a time. Only this - * method is used during a full import - * - * @return A 'row'. The 'key' for the map is the column name and the 'value' - * is the value of that column. If there are no more rows to be - * returned, return 'null' - */ - public abstract Map nextRow(); - - /** - * This is used for delta-import. It gives the pks of the changed rows in this - * entity - * - * @return the pk vs value of all changed rows - */ - public abstract Map nextModifiedRowKey(); - - /** - * This is used during delta-import. It gives the primary keys of the rows - * that are deleted from this entity. If this entity is the root entity, solr - * document is deleted. If this is a sub-entity, the Solr document is - * considered as 'changed' and will be recreated - * - * @return the pk vs value of all changed rows - */ - public abstract Map nextDeletedRowKey(); - - /** - * This is used during delta-import. This gives the primary keys and their - * values of all the rows changed in a parent entity due to changes in this - * entity. - * - * @return the pk vs value of all changed rows in the parent entity - */ - public abstract Map nextModifiedParentRowKey(); - - /** - * Invoked for each parent-row after the last row for this entity is processed. If this is the root-most - * entity, it will be called only once in the import, at the very end. - * - */ - public abstract void destroy(); - - /** - * Invoked after the transformers are invoked. EntityProcessors can add, remove or modify values - * added by Transformers in this method. - * - * @param r The transformed row - * @since solr 1.4 - */ - public void postTransform(Map r) { - } - - /** - * Invoked when the Entity processor is destroyed towards the end of import. - * - * @since solr 1.4 - */ - public void close() { - //no-op - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,286 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.*; -import static org.apache.solr.handler.dataimport.EntityProcessorBase.*; -import static org.apache.solr.handler.dataimport.EntityProcessorBase.SKIP; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * A Wrapper over EntityProcessor instance which performs transforms and handles multi-row outputs correctly. - * - * @version $Id: EntityProcessorWrapper.java 813676 2009-09-11 06:34:35Z noble $ - * @since solr 1.4 - */ -public class EntityProcessorWrapper extends EntityProcessor { - private static final Logger log = LoggerFactory.getLogger(EntityProcessorWrapper.class); - - private EntityProcessor delegate; - private DocBuilder docBuilder; - - private String onError; - private Context context; - private VariableResolverImpl resolver; - private String entityName; - - protected List transformers; - - protected List> rowcache; - - private Context contextCopy; - - public EntityProcessorWrapper(EntityProcessor delegate, DocBuilder docBuilder) { - this.delegate = delegate; - this.docBuilder = docBuilder; - } - - public void init(Context context) { - rowcache = null; - this.context = context; - resolver = (VariableResolverImpl) context.getVariableResolver(); - //context has to be set correctly . keep the copy of the old one so that it can be restored in destroy - contextCopy = resolver.context; - resolver.context = context; - if (entityName == null) { - onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR)); - if (onError == null) onError = ABORT; - entityName = context.getEntityAttribute(DataConfig.NAME); - } - delegate.init(context); - - } - - @SuppressWarnings("unchecked") - void loadTransformers() { - String transClasses = context.getEntityAttribute(TRANSFORMER); - - if (transClasses == null) { - transformers = Collections.EMPTY_LIST; - return; - } - - String[] transArr = transClasses.split(","); - transformers = new ArrayList() { - public boolean add(Transformer transformer) { - if (docBuilder != null && docBuilder.verboseDebug) { - transformer = docBuilder.writer.getDebugLogger().wrapTransformer(transformer); - } - return super.add(transformer); - } - }; - for (String aTransArr : transArr) { - String trans = aTransArr.trim(); - if (trans.startsWith("script:")) { - String functionName = trans.substring("script:".length()); - ScriptTransformer scriptTransformer = new ScriptTransformer(); - scriptTransformer.setFunctionName(functionName); - transformers.add(scriptTransformer); - continue; - } - try { - Class clazz = DocBuilder.loadClass(trans, context.getSolrCore()); - if (clazz.newInstance() instanceof Transformer) { - transformers.add((Transformer) clazz.newInstance()); - } else { - final Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class); - if (meth == null) { - String msg = "Transformer :" - + trans - + "does not implement Transformer interface or does not have a transformRow(Map m)method"; - log.error(msg); - throw new DataImportHandlerException( - SEVERE, msg); - } - transformers.add(new ReflectionTransformer(meth, clazz, trans)); - } - } catch (Exception e) { - log.error("Unable to load Transformer: " + aTransArr, e); - throw new DataImportHandlerException(SEVERE, - e); - } - } - - } - - @SuppressWarnings("unchecked") - static class ReflectionTransformer extends Transformer { - final Method meth; - - final Class clazz; - - final String trans; - - final Object o; - - public ReflectionTransformer(Method meth, Class clazz, String trans) - throws Exception { - this.meth = meth; - this.clazz = clazz; - this.trans = trans; - o = clazz.newInstance(); - } - - public Object transformRow(Map aRow, Context context) { - try { - return meth.invoke(o, aRow); - } catch (Exception e) { - log.warn("method invocation failed on transformer : " + trans, e); - throw new DataImportHandlerException(WARN, e); - } - } - } - - protected Map getFromRowCache() { - Map r = rowcache.remove(0); - if (rowcache.isEmpty()) - rowcache = null; - return r; - } - - @SuppressWarnings("unchecked") - protected Map applyTransformer(Map row) { - if(row == null) return null; - if (transformers == null) - loadTransformers(); - if (transformers == Collections.EMPTY_LIST) - return row; - Map transformedRow = row; - List> rows = null; - boolean stopTransform = checkStopTransform(row); - for (Transformer t : transformers) { - if (stopTransform) break; - try { - if (rows != null) { - List> tmpRows = new ArrayList>(); - for (Map map : rows) { - resolver.addNamespace(entityName, map); - Object o = t.transformRow(map, context); - if (o == null) - continue; - if (o instanceof Map) { - Map oMap = (Map) o; - stopTransform = checkStopTransform(oMap); - tmpRows.add((Map) o); - } else if (o instanceof List) { - tmpRows.addAll((List) o); - } else { - log.error("Transformer must return Map or a List>"); - } - } - rows = tmpRows; - } else { - resolver.addNamespace(entityName, transformedRow); - Object o = t.transformRow(transformedRow, context); - if (o == null) - return null; - if (o instanceof Map) { - Map oMap = (Map) o; - stopTransform = checkStopTransform(oMap); - transformedRow = (Map) o; - } else if (o instanceof List) { - rows = (List) o; - } else { - log.error("Transformer must return Map or a List>"); - } - } - } catch (Exception e) { - log.warn("transformer threw error", e); - if (ABORT.equals(onError)) { - wrapAndThrow(SEVERE, e); - } else if (SKIP.equals(onError)) { - wrapAndThrow(DataImportHandlerException.SKIP, e); - } - // onError = continue - } - } - if (rows == null) { - return transformedRow; - } else { - rowcache = rows; - return getFromRowCache(); - } - - } - - private boolean checkStopTransform(Map oMap) { - return oMap.get("$stopTransform") != null - && Boolean.parseBoolean(oMap.get("$stopTransform").toString()); - } - - public Map nextRow() { - if (rowcache != null) { - return getFromRowCache(); - } - while (true) { - Map arow = delegate.nextRow(); - if (arow == null) { - return null; - } else { - arow = applyTransformer(arow); - if (arow != null) { - delegate.postTransform(arow); - return arow; - } - } - } - } - - public Map nextModifiedRowKey() { - Map row = delegate.nextModifiedRowKey(); - row = applyTransformer(row); - rowcache = null; - return row; - } - - public Map nextDeletedRowKey() { - Map row = delegate.nextDeletedRowKey(); - row = applyTransformer(row); - rowcache = null; - return row; - } - - public Map nextModifiedParentRowKey() { - return delegate.nextModifiedParentRowKey(); - } - - public void destroy() { - delegate.destroy(); - resolver.context = contextCopy; - contextCopy = null; - } - - public VariableResolverImpl getVariableResolver() { - return resolver; - } - - public Context getContext() { - return context; - } - - @Override - public void close() { - delegate.close(); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,301 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.core.SolrCore; -import static org.apache.solr.handler.dataimport.DataConfig.CLASS; -import static org.apache.solr.handler.dataimport.DataConfig.NAME; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DocBuilder.loadClass; -import org.apache.solr.util.DateMathParser; -import org.apache.solr.client.solrj.util.ClientUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.net.URLEncoder; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - *

Holds definitions for evaluators provided by DataImportHandler

Refer to http://wiki.apache.org/solr/DataImportHandler for more - * details.

- *

- * This API is experimental and may change in the future. - * - * @version $Id: EvaluatorBag.java 757682 2009-03-24 08:09:49Z shalin $ - * @since solr 1.3 - */ -public class EvaluatorBag { - private static final Logger LOG = LoggerFactory.getLogger(EvaluatorBag.class); - - public static final String DATE_FORMAT_EVALUATOR = "formatDate"; - - public static final String URL_ENCODE_EVALUATOR = "encodeUrl"; - - public static final String ESCAPE_SOLR_QUERY_CHARS = "escapeQueryChars"; - - public static final String SQL_ESCAPE_EVALUATOR = "escapeSql"; - static final Pattern FORMAT_METHOD = Pattern - .compile("^(\\w*?)\\((.*?)\\)$"); - - /** - *

Returns an Evaluator instance meant to be used for escaping values in SQL queries.

It - * escapes the value of the given expression by replacing all occurrences of single-quotes by two single-quotes and - * similarily for double-quotes

- * - * @return an Evaluator instance capable of SQL-escaping expressions. - */ - public static Evaluator getSqlEscapingEvaluator() { - return new Evaluator() { - public String evaluate(String expression, Context context) { - List l = parseParams(expression, context.getVariableResolver()); - if (l.size() != 1) { - throw new DataImportHandlerException(SEVERE, "'escapeSql' must have at least one parameter "); - } - String s = l.get(0).toString(); - return s.replaceAll("'", "''").replaceAll("\"", "\"\""); - } - }; - } - - /** - *

Returns an Evaluator instance meant to be used for escaping reserved characters in Solr - * queries

- * - * @return an Evaluator instance capable of escaping reserved characters in solr queries. - * - * @see org.apache.solr.client.solrj.util.ClientUtils#escapeQueryChars(String) - */ - public static Evaluator getSolrQueryEscapingEvaluator() { - return new Evaluator() { - public String evaluate(String expression, Context context) { - List l = parseParams(expression, context.getVariableResolver()); - if (l.size() != 1) { - throw new DataImportHandlerException(SEVERE, "'escapeQueryChars' must have at least one parameter "); - } - String s = l.get(0).toString(); - return ClientUtils.escapeQueryChars(s); - } - }; - } - - /** - *

Returns an Evaluator instance capable of URL-encoding expressions. The expressions are evaluated - * using a VariableResolver

- * - * @return an Evaluator instance capable of URL-encoding expressions. - */ - public static Evaluator getUrlEvaluator() { - return new Evaluator() { - public String evaluate(String expression, Context context) { - List l = parseParams(expression, context.getVariableResolver()); - if (l.size() != 1) { - throw new DataImportHandlerException(SEVERE, "'encodeUrl' must have at least one parameter "); - } - String s = l.get(0).toString(); - - try { - return URLEncoder.encode(s.toString(), "UTF-8"); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Unable to encode expression: " + expression + " with value: " + s); - return null; - } - } - }; - } - - /** - *

Returns an Evaluator instance capable of formatting values using a given date format.

- * The value to be formatted can be a entity.field or a date expression parsed with DateMathParser class. - * If the value is in a String, then it is assumed to be a datemath expression, otherwise it resolved using a - * VariableResolver instance

- * - * @return an Evaluator instance capable of formatting values to a given date format - * - * @see DateMathParser - */ - public static Evaluator getDateFormatEvaluator() { - return new Evaluator() { - public String evaluate(String expression, Context context) { - List l = parseParams(expression, context.getVariableResolver()); - if (l.size() != 2) { - throw new DataImportHandlerException(SEVERE, "'formatDate()' must have two parameters "); - } - Object o = l.get(0); - Object format = l.get(1); - if (format instanceof VariableWrapper) { - VariableWrapper wrapper = (VariableWrapper) format; - o = wrapper.resolve(); - if (o == null) { - format = wrapper.varName; - LOG.warn("Deprecated syntax used. The syntax of formatDate has been changed to formatDate(, ''). " + - "The old syntax will stop working in Solr 1.5"); - } else { - format = o.toString(); - } - } - String dateFmt = format.toString(); - SimpleDateFormat fmt = new SimpleDateFormat(dateFmt); - Date date = null; - if (o instanceof VariableWrapper) { - VariableWrapper variableWrapper = (VariableWrapper) o; - Object variableval = variableWrapper.resolve(); - if (variableval instanceof Date) { - date = (Date) variableval; - } else { - String s = variableval.toString(); - try { - date = DataImporter.DATE_TIME_FORMAT.get().parse(s); - } catch (ParseException exp) { - wrapAndThrow(SEVERE, exp, "Invalid expression for date"); - } - } - } else { - String datemathfmt = o.toString(); - datemathfmt = datemathfmt.replaceAll("NOW", ""); - try { - date = dateMathParser.parseMath(datemathfmt); - } catch (ParseException e) { - wrapAndThrow(SEVERE, e, "Invalid expression for date"); - } - } - return fmt.format(date); - } - - }; - } - - static Map getFunctionsNamespace(final List> fn, DocBuilder docBuilder) { - final Map evaluators = new HashMap(); - evaluators.put(DATE_FORMAT_EVALUATOR, getDateFormatEvaluator()); - evaluators.put(SQL_ESCAPE_EVALUATOR, getSqlEscapingEvaluator()); - evaluators.put(URL_ENCODE_EVALUATOR, getUrlEvaluator()); - evaluators.put(ESCAPE_SOLR_QUERY_CHARS, getSolrQueryEscapingEvaluator()); - SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore(); - for (Map map : fn) { - try { - evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).newInstance()); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS)); - } - } - - return new HashMap() { - @Override - public String get(Object key) { - if (key == null) - return null; - Matcher m = FORMAT_METHOD.matcher((String) key); - if (!m.find()) - return null; - String fname = m.group(1); - Evaluator evaluator = evaluators.get(fname); - if (evaluator == null) - return null; - VariableResolverImpl vri = VariableResolverImpl.CURRENT_VARIABLE_RESOLVER.get(); - Context ctx = vri == null ? null : vri.context; - return evaluator.evaluate(m.group(2), ctx); - } - - }; - } - - /** - * Parses a string of expression into separate params. The values are separated by commas. each value will be - * translated into one of the following: - * <ol> - * <li>If it is in single quotes the value will be translated to a String</li> - * <li>If is is not in quotes and is a number a it will be translated into a Double</li> - * <li>else it is a variable which can be resolved and it will be put in as an instance of VariableWrapper</li> - * </ol> - * - * @param expression the expression to be parsed - * @param vr the VariableResolver instance for resolving variables - * - * @return a List of objects which can either be a string, number or a variable wrapper - */ - public static List parseParams(String expression, VariableResolver vr) { - List result = new ArrayList(); - expression = expression.trim(); - String[] ss = expression.split(","); - for (int i = 0; i < ss.length; i++) { - ss[i] = ss[i].trim(); - if (ss[i].startsWith("'")) {//a string param has started - StringBuilder sb = new StringBuilder(); - while (true) { - sb.append(ss[i]); - if (ss[i].endsWith("'")) break; - i++; - if (i >= ss.length) - throw new DataImportHandlerException(SEVERE, "invalid string at " + ss[i - 1] + " in function params: " + expression); - sb.append(","); - } - String s = sb.substring(1, sb.length() - 1); - s = s.replaceAll("\\\\'", "'"); - result.add(s); - } else { - if (Character.isDigit(ss[i].charAt(0))) { - try { - Double doub = Double.parseDouble(ss[i]); - result.add(doub); - } catch (NumberFormatException e) { - if (vr.resolve(ss[i]) == null) { - wrapAndThrow( - SEVERE, e, "Invalid number :" + ss[i] + - "in parameters " + expression); - } - } - } else { - result.add(new VariableWrapper(ss[i], vr)); - } - } - } - return result; - } - - public static class VariableWrapper { - String varName; - VariableResolver vr; - - public VariableWrapper(String s, VariableResolver vr) { - this.varName = s; - this.vr = vr; - } - - public Object resolve() { - return vr.resolve(varName); - - } - - public String toString() { - Object o = vr.resolve(varName); - return o == null ? null : o.toString(); - - } - } - - static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$"); - - static DateMathParser dateMathParser = new DateMathParser(TimeZone - .getDefault(), Locale.getDefault()); - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -/** - *

- * Pluggable functions for resolving variables - *

- *

- * Implementations of this abstract class must provide a public no-arg constructor. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and may change in the future. - * - * @version $Id: Evaluator.java 745734 2009-02-19 05:28:48Z shalin $ - * @since solr 1.3 - */ -public abstract class Evaluator { - - /** - * Return a String after processing an expression and a VariableResolver - * - * @see org.apache.solr.handler.dataimport.VariableResolver - * @param expression string to be evaluated - * @param context instance - * @return the value of the given expression evaluated using the resolver - */ - public abstract String evaluate(String expression, Context context); -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -/** - * Event listener for DataImportHandler - * - * This API is experimental and subject to change - * - * @version $Id: EventListener.java 757332 2009-03-23 07:14:19Z shalin $ - * @since solr 1.4 - */ -public interface EventListener { - - /** - * Event callback - * - * @param ctx the Context in which this event was called - */ - public void onEvent(Context ctx); - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,136 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.*; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.lang.reflect.Modifier; -import java.sql.Blob; -import java.sql.Clob; -import java.util.Properties; - -/** - * This can be useful for users who have a DB field containing xml and wish to use a nested XPathEntityProcessor - *

- * The datasouce may be configured as follows - *

- * - *

- * The enity which uses this datasource must keep the url value as the varaible name url="field-name" - *

- * The fieldname must be resolvable from VariableResolver - *

- * This may be used with any EntityProcessor which uses a DataSource eg:XPathEntityProcessor - *

- * Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types - * - * @version $Id: FieldReaderDataSource.java 813676 2009-09-11 06:34:35Z noble $ - * @since 1.4 - */ -public class FieldReaderDataSource extends DataSource { - private static final Logger LOG = LoggerFactory.getLogger(FieldReaderDataSource.class); - protected VariableResolver vr; - protected String dataField; - private String encoding; - private EntityProcessorWrapper entityProcessor; - - public void init(Context context, Properties initProps) { - dataField = context.getEntityAttribute("dataField"); - encoding = context.getEntityAttribute("encoding"); - entityProcessor = (EntityProcessorWrapper) context.getEntityProcessor(); - /*no op*/ - } - - public Reader getData(String query) { - Object o = entityProcessor.getVariableResolver().resolve(dataField); - if (o == null) { - throw new DataImportHandlerException (SEVERE, "No field available for name : " +dataField); - } - if (o instanceof String) { - return new StringReader((String) o); - } else if (o instanceof Clob) { - Clob clob = (Clob) o; - try { - //Most of the JDBC drivers have getCharacterStream defined as public - // so let us just check it - return readCharStream(clob); - } catch (Exception e) { - LOG.info("Unable to get data from CLOB"); - return null; - - } - - } else if (o instanceof Blob) { - Blob blob = (Blob) o; - try { - //Most of the JDBC drivers have getBinaryStream defined as public - // so let us just check it - Method m = blob.getClass().getDeclaredMethod("getBinaryStream"); - if (Modifier.isPublic(m.getModifiers())) { - return getReader(m, blob); - } else { - // force invoke - m.setAccessible(true); - return getReader(m, blob); - } - } catch (Exception e) { - LOG.info("Unable to get data from BLOB"); - return null; - - } - } else { - return new StringReader(o.toString()); - } - - } - - static Reader readCharStream(Clob clob) { - try { - Method m = clob.getClass().getDeclaredMethod("getCharacterStream"); - if (Modifier.isPublic(m.getModifiers())) { - return (Reader) m.invoke(clob); - } else { - // force invoke - m.setAccessible(true); - return (Reader) m.invoke(clob); - } - } catch (Exception e) { - wrapAndThrow(SEVERE, e,"Unable to get reader from clob"); - return null;//unreachable - } - } - - private Reader getReader(Method m, Blob blob) - throws IllegalAccessException, InvocationTargetException, UnsupportedEncodingException { - InputStream is = (InputStream) m.invoke(blob); - if (encoding == null) { - return (new InputStreamReader(is)); - } else { - return (new InputStreamReader(is, encoding)); - } - } - - public void close() { - - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,125 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.*; -import java.util.Properties; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - *

- * A DataSource which reads from local files - *

- *

- * The file is read with the default platform encoding. It can be overriden by - * specifying the encoding in solrconfig.xml - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: FileDataSource.java 812122 2009-09-07 13:12:01Z shalin $ - * @since solr 1.3 - */ -public class FileDataSource extends DataSource { - public static final String BASE_PATH = "basePath"; - - /** - * The basePath for this data source - */ - protected String basePath; - - /** - * The encoding using which the given file should be read - */ - protected String encoding = null; - - private static final Logger LOG = LoggerFactory.getLogger(FileDataSource.class); - - public void init(Context context, Properties initProps) { - basePath = initProps.getProperty(BASE_PATH); - if (initProps.get(URLDataSource.ENCODING) != null) - encoding = initProps.getProperty(URLDataSource.ENCODING); - } - - /** - *

- * Returns a reader for the given file. - *

- *

- * If the given file is not absolute, we try to construct an absolute path - * using basePath configuration. If that fails, then the relative path is - * tried. If file is not found a RuntimeException is thrown. - *

- *

- * It is the responsibility of the calling method to properly close the - * returned Reader - *

- */ - public Reader getData(String query) { - try { - File file0 = new File(query); - File file = file0; - - if (!file.isAbsolute()) - file = new File(basePath + query); - - if (file.isFile() && file.canRead()) { - LOG.debug("Accessing File: " + file.toString()); - return openStream(file); - } else if (file != file0) - if (file0.isFile() && file0.canRead()) { - LOG.debug("Accessing File0: " + file0.toString()); - return openStream(file0); - } - - throw new FileNotFoundException("Could not find file: " + query); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - } - - /** - * Open a {@link java.io.Reader} for the given file name - * - * @param file a {@link java.io.File} instance - * @return a Reader on the given file - * @throws FileNotFoundException if the File does not exist - * @throws UnsupportedEncodingException if the encoding is unsupported - * @since solr 1.4 - */ - protected Reader openStream(File file) throws FileNotFoundException, - UnsupportedEncodingException { - if (encoding == null) { - return new InputStreamReader(new FileInputStream(file)); - } else { - return new InputStreamReader(new FileInputStream(file), encoding); - } - } - - public void close() { - - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,292 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.File; -import java.io.FilenameFilter; -import java.text.ParseException; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - *

- * An EntityProcessor instance which can stream file names found in a given base - * directory matching patterns and returning rows containing file information. - *

- *

- *

- * It supports querying a give base directory by matching: - *

    - *
  • regular expressions to file names
  • - *
  • excluding certain files based on regular expression
  • - *
  • last modification date (newer or older than a given date or time)
  • - *
  • size (bigger or smaller than size given in bytes)
  • - *
  • recursively iterating through sub-directories
  • - *
- * Its output can be used along with FileDataSource to read from files in file - * systems. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: FileListEntityProcessor.java 812122 2009-09-07 13:12:01Z shalin $ - * @since solr 1.3 - */ -public class FileListEntityProcessor extends EntityProcessorBase { - /** - * A regex pattern to identify files given in data-config.xml after resolving any variables - */ - protected String fileName; - - /** - * The baseDir given in data-config.xml after resolving any variables - */ - protected String baseDir; - - /** - * A Regex pattern of excluded file names as given in data-config.xml after resolving any variables - */ - protected String excludes; - - /** - * The newerThan given in data-config as a {@link java.util.Date} - *

- * Note: This variable is resolved just-in-time in the {@link #nextRow()} method. - *

- */ - protected Date newerThan; - - /** - * The newerThan given in data-config as a {@link java.util.Date} - */ - protected Date olderThan; - - /** - * The biggerThan given in data-config as a long value - *

- * Note: This variable is resolved just-in-time in the {@link #nextRow()} method. - *

- */ - protected long biggerThan = -1; - - /** - * The smallerThan given in data-config as a long value - *

- * Note: This variable is resolved just-in-time in the {@link #nextRow()} method. - *

- */ - protected long smallerThan = -1; - - /** - * The recursive given in data-config. Default value is false. - */ - protected boolean recursive = false; - - private Pattern fileNamePattern, excludesPattern; - - public void init(Context context) { - super.init(context); - fileName = context.getEntityAttribute(FILE_NAME); - if (fileName != null) { - fileName = resolver.replaceTokens(fileName); - fileNamePattern = Pattern.compile(fileName); - } - baseDir = context.getEntityAttribute(BASE_DIR); - if (baseDir == null) - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'baseDir' is a required attribute"); - baseDir = resolver.replaceTokens(baseDir); - File dir = new File(baseDir); - if (!dir.isDirectory()) - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'baseDir' value: " + baseDir + " is not a directory"); - - String r = context.getEntityAttribute(RECURSIVE); - if (r != null) - recursive = Boolean.parseBoolean(r); - excludes = context.getEntityAttribute(EXCLUDES); - if (excludes != null) { - excludes = resolver.replaceTokens(excludes); - excludesPattern = Pattern.compile(excludes); - } - } - - /** - * Get the Date object corresponding to the given string. - * - * @param dateStr the date string. It can be a DateMath string or it may have a evaluator function - * @return a Date instance corresponding to the input string - */ - private Date getDate(String dateStr) { - if (dateStr == null) - return null; - - Matcher m = PLACE_HOLDER_PATTERN.matcher(dateStr); - if (m.find()) { - Object o = resolver.resolve(m.group(1)); - if (o instanceof Date) return (Date)o; - dateStr = (String) o; - } else { - dateStr = resolver.replaceTokens(dateStr); - } - m = EvaluatorBag.IN_SINGLE_QUOTES.matcher(dateStr); - if (m.find()) { - String expr = null; - expr = m.group(1).replaceAll("NOW", ""); - try { - return EvaluatorBag.dateMathParser.parseMath(expr); - } catch (ParseException exp) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Invalid expression for date", exp); - } - } - try { - return DataImporter.DATE_TIME_FORMAT.get().parse(dateStr); - } catch (ParseException exp) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Invalid expression for date", exp); - } - } - - /** - * Get the Long value for the given string after resolving any evaluator or variable. - * - * @param sizeStr the size as a string - * @return the Long value corresponding to the given string - */ - private Long getSize(String sizeStr) { - if (sizeStr == null) - return null; - - Matcher m = PLACE_HOLDER_PATTERN.matcher(sizeStr); - if (m.find()) { - Object o = resolver.resolve(m.group(1)); - if (o instanceof Number) { - Number number = (Number) o; - return number.longValue(); - } - sizeStr = (String) o; - } else { - sizeStr = resolver.replaceTokens(sizeStr); - } - - return Long.parseLong(sizeStr); - } - - public Map nextRow() { - if (rowIterator != null) - return getNext(); - List> fileDetails = new ArrayList>(); - File dir = new File(baseDir); - - String dateStr = context.getEntityAttribute(NEWER_THAN); - newerThan = getDate(dateStr); - dateStr = context.getEntityAttribute(OLDER_THAN); - olderThan = getDate(dateStr); - String biggerThanStr = context.getEntityAttribute(BIGGER_THAN); - if (biggerThanStr != null) - biggerThan = getSize(biggerThanStr); - String smallerThanStr = context.getEntityAttribute(SMALLER_THAN); - if (smallerThanStr != null) - smallerThan = getSize(smallerThanStr); - - getFolderFiles(dir, fileDetails); - rowIterator = fileDetails.iterator(); - return getNext(); - } - - private void getFolderFiles(File dir, final List> fileDetails) { - // Fetch an array of file objects that pass the filter, however the - // returned array is never populated; accept() always returns false. - // Rather we make use of the fileDetails array which is populated as - // a side affect of the accept method. - dir.list(new FilenameFilter() { - public boolean accept(File dir, String name) { - File fileObj = new File(dir, name); - if (fileObj.isDirectory()) { - if (recursive) getFolderFiles(fileObj, fileDetails); - } else if (fileNamePattern == null) { - addDetails(fileDetails, dir, name); - } else if (fileNamePattern.matcher(name).find()) { - if (excludesPattern != null && excludesPattern.matcher(name).find()) - return false; - addDetails(fileDetails, dir, name); - } - return false; - } - }); - } - - private void addDetails(List> files, File dir, String name) { - Map details = new HashMap(); - File aFile = new File(dir, name); - if (aFile.isDirectory()) return; - long sz = aFile.length(); - Date lastModified = new Date(aFile.lastModified()); - if (biggerThan != -1 && sz <= biggerThan) - return; - if (smallerThan != -1 && sz >= smallerThan) - return; - if (olderThan != null && lastModified.after(olderThan)) - return; - if (newerThan != null && lastModified.before(newerThan)) - return; - details.put(DIR, dir.getAbsolutePath()); - details.put(FILE, name); - details.put(ABSOLUTE_FILE, aFile.getAbsolutePath()); - details.put(SIZE, sz); - details.put(LAST_MODIFIED, lastModified); - files.add(details); - } - - public static final Pattern PLACE_HOLDER_PATTERN = Pattern - .compile("\\$\\{(.*?)\\}"); - - public static final String DIR = "fileDir"; - - public static final String FILE = "file"; - - public static final String ABSOLUTE_FILE = "fileAbsolutePath"; - - public static final String SIZE = "fileSize"; - - public static final String LAST_MODIFIED = "fileLastModified"; - - public static final String FILE_NAME = "fileName"; - - public static final String BASE_DIR = "baseDir"; - - public static final String EXCLUDES = "excludes"; - - public static final String NEWER_THAN = "newerThan"; - - public static final String OLDER_THAN = "olderThan"; - - public static final String BIGGER_THAN = "biggerThan"; - - public static final String SMALLER_THAN = "smallerThan"; - - public static final String RECURSIVE = "recursive"; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.analysis.HTMLStripCharFilter; -import org.apache.lucene.analysis.CharReader; - -import java.io.IOException; -import java.io.StringReader; -import java.io.BufferedReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * A Transformer implementation which strip off HTML tags using org.apache.solr.analysis.HTMLStripReader This is useful - * in case you don't need this HTML anyway. - * - * @version $Id: HTMLStripTransformer.java 819638 2009-09-28 16:44:26Z shalin $ - * @see org.apache.solr.analysis.HTMLStripCharFilter - * @since solr 1.4 - */ -public class HTMLStripTransformer extends Transformer { - - @Override - @SuppressWarnings("unchecked") - public Object transformRow(Map row, Context context) { - VariableResolver resolver = context.getVariableResolver(); - List> fields = context.getAllEntityFields(); - for (Map field : fields) { - String col = field.get(DataImporter.COLUMN); - String splitHTML = resolver.replaceTokens(field.get(STRIP_HTML)); - if (!TRUE.equals(splitHTML)) - continue; - Object tmpVal = row.get(col); - if (tmpVal == null) - continue; - - if (tmpVal instanceof List) { - List inputs = (List) tmpVal; - List results = new ArrayList(); - for (String input : inputs) { - if (input == null) - continue; - Object o = stripHTML(input, col); - if (o != null) - results.add(o); - } - row.put(col, results); - } else { - String value = tmpVal.toString(); - Object o = stripHTML(value, col); - if (o != null) - row.put(col, o); - } - } - return row; - } - - private Object stripHTML(String value, String column) { - StringBuilder out = new StringBuilder(); - StringReader strReader = new StringReader(value); - try { - HTMLStripCharFilter html = new HTMLStripCharFilter(CharReader.get(strReader.markSupported() ? strReader : new BufferedReader(strReader))); - char[] cbuf = new char[1024 * 10]; - while (true) { - int count = html.read(cbuf); - if (count == -1) - break; // end of stream mark is -1 - if (count > 0) - out.append(cbuf, 0, count); - } - html.close(); - } catch (IOException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Failed stripping HTML for column: " + column, e); - } - return out.toString(); - } - - public static final String STRIP_HTML = "stripHTML"; - - public static final String TRUE = "true"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.net.URL; -import java.net.URLConnection; -import java.util.Properties; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - *

- * A data source implementation which can be used to read character files using - * HTTP. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: HttpDataSource.java 766638 2009-04-20 10:12:50Z shalin $ - * @since solr 1.3 - * @deprecated use {@link org.apache.solr.handler.dataimport.URLDataSource} instead - */ -@Deprecated -public class HttpDataSource extends URLDataSource { - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,417 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.naming.InitialContext; -import java.sql.*; -import java.util.*; -import java.util.concurrent.Callable; - -/** - *

A DataSource implementation which can fetch data using JDBC.

Refer to http://wiki.apache.org/solr/DataImportHandler for more - * details.

- *

- * This API is experimental and may change in the future. - * - * @version $Id: JdbcDataSource.java 823398 2009-10-09 04:06:09Z noble $ - * @since solr 1.3 - */ -public class JdbcDataSource extends - DataSource>> { - private static final Logger LOG = LoggerFactory.getLogger(JdbcDataSource.class); - - protected Callable factory; - - private long connLastUsed = 0; - - private Connection conn; - - private Map fieldNameVsType = new HashMap(); - - private boolean convertType = false; - - private int batchSize = FETCH_SIZE; - - private int maxRows = 0; - - public void init(Context context, Properties initProps) { - Object o = initProps.get(CONVERT_TYPE); - if (o != null) - convertType = Boolean.parseBoolean(o.toString()); - - factory = createConnectionFactory(context, initProps); - - String bsz = initProps.getProperty("batchSize"); - if (bsz != null) { - bsz = context.getVariableResolver().replaceTokens(bsz); - try { - batchSize = Integer.parseInt(bsz); - if (batchSize == -1) - batchSize = Integer.MIN_VALUE; - } catch (NumberFormatException e) { - LOG.warn("Invalid batch size: " + bsz); - } - } - - for (Map map : context.getAllEntityFields()) { - String n = map.get(DataImporter.COLUMN); - String t = map.get(DataImporter.TYPE); - if ("sint".equals(t) || "integer".equals(t)) - fieldNameVsType.put(n, Types.INTEGER); - else if ("slong".equals(t) || "long".equals(t)) - fieldNameVsType.put(n, Types.BIGINT); - else if ("float".equals(t) || "sfloat".equals(t)) - fieldNameVsType.put(n, Types.FLOAT); - else if ("double".equals(t) || "sdouble".equals(t)) - fieldNameVsType.put(n, Types.DOUBLE); - else if ("date".equals(t)) - fieldNameVsType.put(n, Types.DATE); - else if ("boolean".equals(t)) - fieldNameVsType.put(n, Types.BOOLEAN); - else if ("binary".equals(t)) - fieldNameVsType.put(n, Types.BLOB); - else - fieldNameVsType.put(n, Types.VARCHAR); - } - } - - protected Callable createConnectionFactory(final Context context, - final Properties initProps) { - final VariableResolver resolver = context.getVariableResolver(); - resolveVariables(resolver, initProps); - final String jndiName = initProps.getProperty(JNDI_NAME); - final String url = initProps.getProperty(URL); - final String driver = initProps.getProperty(DRIVER); - - if (url == null && jndiName == null) - throw new DataImportHandlerException(SEVERE, - "JDBC URL or JNDI name has to be specified"); - - if (driver != null) { - try { - DocBuilder.loadClass(driver, context.getSolrCore()); - } catch (ClassNotFoundException e) { - wrapAndThrow(SEVERE, e, "Could not load driver: " + driver); - } - } else { - if(jndiName == null){ - throw new DataImportHandlerException(SEVERE, "One of driver or jndiName must be specified in the data source"); - } - } - - String s = initProps.getProperty("maxRows"); - if (s != null) { - maxRows = Integer.parseInt(s); - } - - return factory = new Callable() { - public Connection call() throws Exception { - // Resolve variables again because the variables may have changed - resolveVariables(resolver, initProps); - LOG.info("Creating a connection for entity " - + context.getEntityAttribute(DataImporter.NAME) + " with URL: " - + url); - long start = System.currentTimeMillis(); - Connection c = null; - try { - if(url != null){ - c = DriverManager.getConnection(url, initProps); - } else if(jndiName != null){ - InitialContext ctx = new InitialContext(); - Object jndival = ctx.lookup(jndiName); - if (jndival instanceof javax.sql.DataSource) { - javax.sql.DataSource dataSource = (javax.sql.DataSource) jndival; - String user = (String) initProps.get("user"); - String pass = (String) initProps.get("password"); - if(user == null || user.trim().equals("")){ - c = dataSource.getConnection(); - } else { - c = dataSource.getConnection(user, pass); - } - } else { - throw new DataImportHandlerException(SEVERE, - "the jndi name : '"+jndiName +"' is not a valid javax.sql.DataSource"); - } - } - } catch (SQLException e) { - // DriverManager does not allow you to use a driver which is not loaded through - // the class loader of the class which is trying to make the connection. - // This is a workaround for cases where the user puts the driver jar in the - // solr.home/lib or solr.home/core/lib directories. - Driver d = (Driver) DocBuilder.loadClass(driver, context.getSolrCore()).newInstance(); - c = d.connect(url, initProps); - } - if (c != null) { - if (Boolean.parseBoolean(initProps.getProperty("readOnly"))) { - c.setReadOnly(true); - // Add other sane defaults - c.setAutoCommit(true); - c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED); - c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT); - } - if (!Boolean.parseBoolean(initProps.getProperty("autoCommit"))) { - c.setAutoCommit(false); - } - String transactionIsolation = initProps.getProperty("transactionIsolation"); - if ("TRANSACTION_READ_UNCOMMITTED".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED); - } else if ("TRANSACTION_READ_COMMITTED".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED); - } else if ("TRANSACTION_REPEATABLE_READ".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_REPEATABLE_READ); - } else if ("TRANSACTION_SERIALIZABLE".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE); - } else if ("TRANSACTION_NONE".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_NONE); - } - String holdability = initProps.getProperty("holdability"); - if ("CLOSE_CURSORS_AT_COMMIT".equals(holdability)) { - c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT); - } else if ("HOLD_CURSORS_OVER_COMMIT".equals(holdability)) { - c.setHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT); - } - } - LOG.info("Time taken for getConnection(): " - + (System.currentTimeMillis() - start)); - return c; - } - }; - } - - private void resolveVariables(VariableResolver resolver, Properties initProps) { - for (Map.Entry entry : initProps.entrySet()) { - if (entry.getValue() != null) { - entry.setValue(resolver.replaceTokens((String) entry.getValue())); - } - } - } - - public Iterator> getData(String query) { - ResultSetIterator r = new ResultSetIterator(query); - return r.getIterator(); - } - - private void logError(String msg, Exception e) { - LOG.warn(msg, e); - } - - private List readFieldNames(ResultSetMetaData metaData) - throws SQLException { - List colNames = new ArrayList(); - int count = metaData.getColumnCount(); - for (int i = 0; i < count; i++) { - colNames.add(metaData.getColumnLabel(i + 1)); - } - return colNames; - } - - private class ResultSetIterator { - ResultSet resultSet; - - Statement stmt = null; - - List colNames; - - Iterator> rSetIterator; - - public ResultSetIterator(String query) { - - try { - Connection c = getConnection(); - stmt = c.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - stmt.setFetchSize(batchSize); - stmt.setMaxRows(maxRows); - LOG.debug("Executing SQL: " + query); - long start = System.currentTimeMillis(); - if (stmt.execute(query)) { - resultSet = stmt.getResultSet(); - } - LOG.trace("Time taken for sql :" - + (System.currentTimeMillis() - start)); - colNames = readFieldNames(resultSet.getMetaData()); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Unable to execute query: " + query); - } - if (resultSet == null) { - rSetIterator = new ArrayList>().iterator(); - return; - } - - rSetIterator = new Iterator>() { - public boolean hasNext() { - return hasnext(); - } - - public Map next() { - return getARow(); - } - - public void remove() {/* do nothing */ - } - }; - } - - private Iterator> getIterator() { - return rSetIterator; - } - - private Map getARow() { - if (resultSet == null) - return null; - Map result = new HashMap(); - for (String colName : colNames) { - try { - if (!convertType) { - // Use underlying database's type information - result.put(colName, resultSet.getObject(colName)); - continue; - } - - Integer type = fieldNameVsType.get(colName); - if (type == null) - type = Types.VARCHAR; - switch (type) { - case Types.INTEGER: - result.put(colName, resultSet.getInt(colName)); - break; - case Types.FLOAT: - result.put(colName, resultSet.getFloat(colName)); - break; - case Types.BIGINT: - result.put(colName, resultSet.getLong(colName)); - break; - case Types.DOUBLE: - result.put(colName, resultSet.getDouble(colName)); - break; - case Types.DATE: - result.put(colName, resultSet.getDate(colName)); - break; - case Types.BOOLEAN: - result.put(colName, resultSet.getBoolean(colName)); - break; - case Types.BLOB: - result.put(colName, resultSet.getBytes(colName)); - break; - default: - result.put(colName, resultSet.getString(colName)); - break; - } - } catch (SQLException e) { - logError("Error reading data ", e); - wrapAndThrow(SEVERE, e, "Error reading data from database"); - } - } - return result; - } - - private boolean hasnext() { - if (resultSet == null) - return false; - try { - if (resultSet.next()) { - return true; - } else { - close(); - return false; - } - } catch (SQLException e) { - close(); - wrapAndThrow(SEVERE,e); - return false; - } - } - - private void close() { - try { - if (resultSet != null) - resultSet.close(); - if (stmt != null) - stmt.close(); - } catch (Exception e) { - logError("Exception while closing result set", e); - } finally { - resultSet = null; - stmt = null; - } - } - } - - private Connection getConnection() throws Exception { - long currTime = System.currentTimeMillis(); - if (currTime - connLastUsed > CONN_TIME_OUT) { - synchronized (this) { - Connection tmpConn = factory.call(); - closeConnection(); - connLastUsed = System.currentTimeMillis(); - return conn = tmpConn; - } - - } else { - connLastUsed = currTime; - return conn; - } - } - - protected void finalize() throws Throwable { - try { - if(!isClosed){ - LOG.error("JdbcDataSource was not closed prior to finalize(), indicates a bug -- POSSIBLE RESOURCE LEAK!!!"); - close(); - } - } finally { - super.finalize(); - } - } - - private boolean isClosed = false; - - public void close() { - try { - closeConnection(); - } finally { - isClosed = true; - } - } - - private void closeConnection() { - try { - if (conn != null) { - conn.close(); - } - } catch (Exception e) { - LOG.error("Ignoring Error when closing connection", e); - } - } - - private static final long CONN_TIME_OUT = 10 * 1000; // 10 seconds - - private static final int FETCH_SIZE = 500; - - public static final String URL = "url"; - - public static final String JNDI_NAME = "jndiName"; - - public static final String DRIVER = "driver"; - - public static final String CONVERT_TYPE = "convertType"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,156 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - -import org.apache.commons.io.IOUtils; - - -/** - *

- * An EntityProcessor instance which can stream lines of text read from a - * datasource. Options allow lines to be explicitly skipped or included in the index. - *

- *

- *

- * Attribute summary - *

    - *
  • url is the required location of the input file. If this value is - * relative, it assumed to be relative to baseLoc.
  • - *
  • acceptLineRegex is an optional attribute that if present discards any - * line which does not match the regExp.
  • - *
  • skipLineRegex is an optional attribute that is applied after any - * acceptLineRegex and discards any line which matches this regExp.
  • - *
- *

- * Although envisioned for reading lines from a file or url, LineEntityProcessor may also be useful - * for dealing with change lists, where each line contains filenames which can be used by subsequent entities - * to parse content from those files. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: LineEntityProcessor.java 816042 2009-09-17 04:00:41Z noble $ - * @since solr 1.4 - */ -public class LineEntityProcessor extends EntityProcessorBase { - private Pattern acceptLineRegex, skipLineRegex; - private String url; - private BufferedReader reader; - - /** - * Parses each of the entity attributes. - */ - public void init(Context context) { - super.init(context); - String s; - - // init a regex to locate files from the input we want to index - s = context.getResolvedEntityAttribute(ACCEPT_LINE_REGEX); - if (s != null) { - acceptLineRegex = Pattern.compile(s); - } - - // init a regex to locate files from the input to be skipped - s = context.getResolvedEntityAttribute(SKIP_LINE_REGEX); - if (s != null) { - skipLineRegex = Pattern.compile(s); - } - - // the FileName is required. - url = context.getResolvedEntityAttribute(URL); - if (url == null) throw - new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'"+ URL +"' is a required attribute"); - } - - - /** - * Reads lines from the url till it finds a lines that matches the - * optional acceptLineRegex and does not match the optional skipLineRegex. - * - * @return A row containing a minimum of one field "rawLine" or null to signal - * end of file. The rawLine is the as line as returned by readLine() - * from the url. However transformers can be used to create as - * many other fields as required. - */ - public Map nextRow() { - if (reader == null) { - reader = new BufferedReader((Reader) context.getDataSource().getData(url)); - } - - String line; - - while ( true ) { - // read a line from the input file - try { - line = reader.readLine(); - } - catch (IOException exp) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Problem reading from input", exp); - } - - if (line == null) return null; // end of input - - // First scan whole line to see if we want it - if (acceptLineRegex != null && ! acceptLineRegex.matcher(line).find()) continue; - if (skipLineRegex != null && skipLineRegex.matcher(line).find()) continue; - // Contruct the 'row' of fields - Map row = new HashMap(); - row.put("rawLine", line); - return row; - } - } - - @Override - public void destroy() { - if (reader != null) { - IOUtils.closeQuietly(reader); - } - reader= null; - super.destroy(); - } - - /** - * Holds the name of entity attribute that will be parsed to obtain - * the filename containing the changelist. - */ - public static final String URL = "url"; - - /** - * Holds the name of entity attribute that will be parsed to obtain - * the pattern to be used when checking to see if a line should - * be returned. - */ - public static final String ACCEPT_LINE_REGEX = "acceptLineRegex"; - - /** - * Holds the name of entity attribute that will be parsed to obtain - * the pattern to be used when checking to see if a line should - * be ignored. - */ - public static final String SKIP_LINE_REGEX = "skipLineRegex"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; - -/** - * A Transformer implementation which logs messages in a given template format. - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and may change in the future. - * - * @version $Id: LogTransformer.java 755132 2009-03-17 06:42:33Z shalin $ - * @since solr 1.4 - */ -public class LogTransformer extends Transformer { - Logger LOG = LoggerFactory.getLogger(LogTransformer.class); - - public Object transformRow(Map row, Context context) { - VariableResolver vr = context.getVariableResolver(); - String expr = context.getEntityAttribute(LOG_TEMPLATE); - String level = vr.replaceTokens(context.getEntityAttribute(LOG_LEVEL)); - - if (expr == null || level == null) return row; - - if ("info".equals(level)) { - if (LOG.isInfoEnabled()) - LOG.info(vr.replaceTokens(expr)); - } else if ("trace".equals(level)) { - if (LOG.isTraceEnabled()) - LOG.trace(vr.replaceTokens(expr)); - } else if ("warn".equals(level)) { - if (LOG.isWarnEnabled()) - LOG.warn(vr.replaceTokens(expr)); - } else if ("error".equals(level)) { - if (LOG.isErrorEnabled()) - LOG.error(vr.replaceTokens(expr)); - } else if ("debug".equals(level)) { - if (LOG.isDebugEnabled()) - LOG.debug(vr.replaceTokens(expr)); - } - - return row; - } - - public static final String LOG_TEMPLATE = "logTemplate"; - public static final String LOG_LEVEL = "logLevel"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; - -/** - *

- * A mock DataSource implementation which can be used for testing. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: MockDataSource.java 681182 2008-07-30 19:35:58Z shalin $ - * @since solr 1.3 - */ -public class MockDataSource extends - DataSource>> { - - private static Map>> cache = new HashMap>>(); - - public static void setIterator(String query, - Iterator> iter) { - cache.put(query, iter); - } - - public static void clearCache() { - cache.clear(); - } - - public void init(Context context, Properties initProps) { - } - - public Iterator> getData(String query) { - return cache.get(query); - } - - public void close() { - cache.clear(); - - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,141 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.NumberFormat; -import java.text.ParseException; -import java.text.ParsePosition; -import java.util.ArrayList; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - *

- * A Transformer instance which can extract numbers out of strings. It uses - * java.text.NumberFormat class to parse strings and supports - * Number, Integer, Currency and Percent styles as supported by - * java.text.NumberFormat with configurable locales. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: NumberFormatTransformer.java 741435 2009-02-06 06:51:50Z shalin $ - * @since solr 1.3 - */ -public class NumberFormatTransformer extends Transformer { - - private static final Pattern localeRegex = Pattern.compile("^([a-z]{2})-([A-Z]{2})$"); - - @SuppressWarnings("unchecked") - public Object transformRow(Map row, Context context) { - VariableResolver resolver = context.getVariableResolver(); - for (Map fld : context.getAllEntityFields()) { - String style = resolver.replaceTokens(fld.get(FORMAT_STYLE)); - if (style != null) { - String column = fld.get(DataImporter.COLUMN); - String srcCol = fld.get(RegexTransformer.SRC_COL_NAME); - Locale locale = null; - String localeStr = resolver.replaceTokens(fld.get(LOCALE)); - if (srcCol == null) - srcCol = column; - if (localeStr != null) { - Matcher matcher = localeRegex.matcher(localeStr); - if (matcher.find() && matcher.groupCount() == 2) { - locale = new Locale(matcher.group(1), matcher.group(2)); - } else { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified for field: " + fld); - } - } else { - locale = Locale.getDefault(); - } - - Object val = row.get(srcCol); - String styleSmall = style.toLowerCase(); - - if (val instanceof List) { - List inputs = (List) val; - List results = new ArrayList(); - for (String input : inputs) { - try { - results.add(process(input, styleSmall, locale)); - } catch (ParseException e) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, - "Failed to apply NumberFormat on column: " + column, e); - } - } - row.put(column, results); - } else { - if (val == null || val.toString().trim().equals("")) - continue; - try { - row.put(column, process(val.toString(), styleSmall, locale)); - } catch (ParseException e) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, - "Failed to apply NumberFormat on column: " + column, e); - } - } - } - } - return row; - } - - private Number process(String val, String style, Locale locale) throws ParseException { - if (INTEGER.equals(style)) { - return parseNumber(val, NumberFormat.getIntegerInstance(locale)); - } else if (NUMBER.equals(style)) { - return parseNumber(val, NumberFormat.getNumberInstance(locale)); - } else if (CURRENCY.equals(style)) { - return parseNumber(val, NumberFormat.getCurrencyInstance(locale)); - } else if (PERCENT.equals(style)) { - return parseNumber(val, NumberFormat.getPercentInstance(locale)); - } - - return null; - } - - private Number parseNumber(String val, NumberFormat numFormat) throws ParseException { - ParsePosition parsePos = new ParsePosition(0); - Number num = numFormat.parse(val, parsePos); - if (parsePos.getIndex() != val.length()) { - throw new ParseException("illegal number format", parsePos.getIndex()); - } - return num; - } - - public static final String FORMAT_STYLE = "formatStyle"; - - public static final String LOCALE = "locale"; - - public static final String NUMBER = "number"; - - public static final String PERCENT = "percent"; - - public static final String INTEGER = "integer"; - - public static final String CURRENCY = "currency"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,84 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.Reader; -import java.io.StringWriter; -import java.util.HashMap; -import java.util.Map; - -/** - *

An implementation of EntityProcessor which reads data from a url/file and give out a row which contains one String - * value. The name of the field is 'plainText'. - * - * @version $Id: PlainTextEntityProcessor.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.4 - */ -public class PlainTextEntityProcessor extends EntityProcessorBase { - private static final Logger LOG = LoggerFactory.getLogger(PlainTextEntityProcessor.class); - private boolean ended = false; - - public void init(Context context) { - super.init(context); - ended = false; - } - - public Map nextRow() { - if (ended) return null; - DataSource ds = context.getDataSource(); - String url = context.getVariableResolver().replaceTokens(context.getEntityAttribute(URL)); - Reader r = null; - try { - r = ds.getData(url); - } catch (Exception e) { - if (ABORT.equals(onError)) { - wrapAndThrow(SEVERE, e, "Exception reading url : " + url); - } - return null; - } - StringWriter sw = new StringWriter(); - char[] buf = new char[1024]; - while (true) { - int len = 0; - try { - len = r.read(buf); - } catch (IOException e) { - if (ABORT.equals(onError)) { - wrapAndThrow(SEVERE, e, "Exception reading url : " + url); - } else { - LOG.warn("IOException while reading from data source", e); - return null; - } - } - if (len <= 0) break; - sw.append(new String(buf, 0, len)); - } - Map row = new HashMap(); - row.put(PLAIN_TEXT, sw.toString()); - ended = true; - return row; - } - - public static final String PLAIN_TEXT = "plainText"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,204 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - *

- * A Transformer implementation which uses Regular Expressions to extract, split - * and replace data in fields. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: RegexTransformer.java 823798 2009-10-10 06:13:55Z noble $ - * @since solr 1.3 - */ -public class RegexTransformer extends Transformer { - private static final Logger LOG = LoggerFactory.getLogger(RegexTransformer.class); - - @SuppressWarnings("unchecked") - public Map transformRow(Map row, - Context context) { - VariableResolver vr = context.getVariableResolver(); - List> fields = context.getAllEntityFields(); - for (Map field : fields) { - String col = field.get(DataImporter.COLUMN); - String reStr = field.get(REGEX); - reStr = vr.replaceTokens(reStr); - String splitBy = field.get(SPLIT_BY); - splitBy = vr.replaceTokens(splitBy); - String replaceWith = field.get(REPLACE_WITH); - replaceWith = vr.replaceTokens(replaceWith); - String groupNames = vr.replaceTokens(field.get(GROUP_NAMES)); - if (reStr != null || splitBy != null) { - String srcColName = field.get(SRC_COL_NAME); - if (srcColName == null) { - srcColName = col; - } - Object tmpVal = row.get(srcColName); - if (tmpVal == null) - continue; - - if (tmpVal instanceof List) { - List inputs = (List) tmpVal; - List results = new ArrayList(); - Map otherVars= null; - for (String input : inputs) { - Object o = process(col, reStr, splitBy, replaceWith, input, groupNames); - if (o != null){ - if (o instanceof Map) { - Map map = (Map) o; - for (Object e : map.entrySet()) { - Map.Entry entry = (Map.Entry) e; - List l = results; - if(!col.equals(entry.getKey())){ - if(otherVars == null) otherVars = new HashMap(); - l = otherVars.get(entry.getKey()); - if(l == null){ - l = new ArrayList(); - otherVars.put(entry.getKey(), l); - } - } - if (entry.getValue() instanceof Collection) { - l.addAll((Collection) entry.getValue()); - } else { - l.add(entry.getValue()); - } - } - } else { - if (o instanceof Collection) { - results.addAll((Collection) o); - } else { - results.add(o); - } - } - } - } - row.put(col, results); - if(otherVars != null) row.putAll(otherVars); - } else { - String value = tmpVal.toString(); - Object o = process(col, reStr, splitBy, replaceWith, value, groupNames); - if (o != null){ - if (o instanceof Map) { - row.putAll((Map) o); - } else{ - row.put(col, o); - } - } - } - } - } - return row; - } - - private Object process(String col, String reStr, String splitBy, - String replaceWith, String value, String groupNames) { - if (splitBy != null) { - return readBySplit(splitBy, value); - } else if (replaceWith != null) { - Pattern p = getPattern(reStr); - Matcher m = p.matcher(value); - return m.find()? m.replaceAll(replaceWith): null; - } else { - return readfromRegExp(reStr, value, col, groupNames); - } - } - - @SuppressWarnings("unchecked") - private List readBySplit(String splitBy, String value) { - String[] vals = value.split(splitBy); - List l = new ArrayList(); - l.addAll(Arrays.asList(vals)); - return l; - } - - @SuppressWarnings("unchecked") - private Object readfromRegExp(String reStr, String value, String columnName, String gNames) { - String[] groupNames = null; - if(gNames != null && gNames.trim().length() >0){ - groupNames = gNames.split(","); - } - Pattern regexp = getPattern(reStr); - Matcher m = regexp.matcher(value); - if (m.find() && m.groupCount() > 0) { - if (m.groupCount() > 1) { - List l = null; - Map map = null; - if(groupNames == null){ - l = new ArrayList(); - } else { - map = new HashMap(); - } - for (int i = 1; i <= m.groupCount(); i++) { - try { - if(l != null){ - l.add(m.group(i)); - } else if (map != null ){ - if(i <= groupNames.length){ - String nameOfGroup = groupNames[i-1]; - if(nameOfGroup != null && nameOfGroup.trim().length() >0){ - map.put(nameOfGroup, m.group(i)); - } - } - } - } catch (Exception e) { - LOG.warn("Parsing failed for field : " + columnName, e); - } - } - return l == null ? map: l; - } else { - return m.group(1); - } - } - - return null; - } - - private Pattern getPattern(String reStr) { - Pattern result = PATTERN_CACHE.get(reStr); - if (result == null) { - PATTERN_CACHE.put(reStr, result = Pattern.compile(reStr)); - } - return result; - } - - private HashMap PATTERN_CACHE = new HashMap(); - - public static final String REGEX = "regex"; - - public static final String REPLACE_WITH = "replaceWith"; - - public static final String SPLIT_BY = "splitBy"; - - public static final String SRC_COL_NAME = "sourceColName"; - - public static final String GROUP_NAMES = "groupNames"; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Map; - -/** - *

- * A Transformer instance capable of executing functions written in scripting - * languages as a Transformer instance. - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @version $Id: ScriptTransformer.java 752586 2009-03-11 19:17:50Z shalin $ - * @since solr 1.3 - */ -public class ScriptTransformer extends Transformer { - private Object engine; - - private Method invokeFunctionMethod; - - private String functionName; - - public Object transformRow(Map row, Context context) { - try { - if (engine == null) - initEngine(context); - if (engine == null) - return row; - return invokeFunctionMethod.invoke(engine, functionName, new Object[]{ - row, context}); - } catch (DataImportHandlerException e) { - throw e; - } catch (InvocationTargetException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Could not invoke method :" - + functionName - + "\n ", e); - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Error invoking script for entity " - + context.getEntityAttribute("name"), e); - } - } - - private void initEngine(Context context) { - try { - String scriptText = context.getScript(); - String scriptLang = context.getScriptLanguage(); - Object scriptEngineMgr = Class - .forName("javax.script.ScriptEngineManager").newInstance(); - // create a Script engine - Method getEngineMethod = scriptEngineMgr.getClass().getMethod( - "getEngineByName", String.class); - engine = getEngineMethod.invoke(scriptEngineMgr, scriptLang); - Method evalMethod = engine.getClass().getMethod("eval", String.class); - invokeFunctionMethod = engine.getClass().getMethod("invokeFunction", - String.class, Object[].class); - evalMethod.invoke(engine, scriptText); - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - " - - -

DataImportHandler Development Console

-<% - String handler = request.getParameter("handler"); // must be specified -%> -
- - - - - - - - - - - - - - - - - -
Handler: <%=handler%> change handler
- - - - - - - - - -
- - Verbose Commit Clean Start Row No. of Rows 
-
data config xml -
-
-
- - - - - - - - -
-
- - - - -
-
- - - - - - -
- -Return to Admin Page - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/MockInitialContextFactory.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/MockInitialContextFactory.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/MockInitialContextFactory.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/MockInitialContextFactory.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.HashMap; -import java.util.Hashtable; -import java.util.Map; - -import javax.naming.NamingException; -import javax.naming.spi.InitialContextFactory; - -import org.easymock.EasyMock; -import org.easymock.IAnswer; -import org.easymock.IMocksControl; - -public class MockInitialContextFactory implements InitialContextFactory { - private static final Map objects = new HashMap(); - private final IMocksControl mockControl; - private final javax.naming.Context context; - - public MockInitialContextFactory() { - mockControl = EasyMock.createStrictControl(); - context = mockControl.createMock(javax.naming.Context.class); - - try { - EasyMock.expect(context.lookup((String) EasyMock.anyObject())).andAnswer( - new IAnswer() { - public Object answer() throws Throwable { - return objects.get(EasyMock.getCurrentArguments()[0]); - } - }).anyTimes(); - - } catch (NamingException e) { - throw new RuntimeException(e); - } - - mockControl.replay(); - } - - @SuppressWarnings("unchecked") - public javax.naming.Context getInitialContext(Hashtable env) { - return context; - } - - public static void bind(String name, Object obj) { - objects.put(name, obj); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,263 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - *

- * Test for CachedSqlEntityProcessor - *

- * - * @version $Id: TestCachedSqlEntityProcessor.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.3 - */ -public class TestCachedSqlEntityProcessor { - - @Test - public void withoutWhereClause() { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap("column", "id")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "desc")); - String q = "select * from x where id=${x.id}"; - Map entityAttrs = AbstractDataImportHandlerTest.createMap( - "query", q); - MockDataSource ds = new MockDataSource(); - VariableResolverImpl vr = new VariableResolverImpl(); - - vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1)); - Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs); - List> rows = new ArrayList>(); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", - "another one")); - MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator()); - EntityProcessor csep = new EntityProcessorWrapper( new CachedSqlEntityProcessor(), null); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(2, rows.size()); - ds.close(); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(2, rows.size()); - Assert.assertEquals(2, rows.get(0).size()); - Assert.assertEquals(2, rows.get(1).size()); - } - - @Test - public void withoutWhereClauseWithTransformers() { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap("column", "id")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "desc")); - String q = "select * from x where id=${x.id}"; - Map entityAttrs = AbstractDataImportHandlerTest.createMap( - "query", q, "transformer", UppercaseTransformer.class.getName()); - MockDataSource ds = new MockDataSource(); - VariableResolverImpl vr = new VariableResolverImpl(); - - vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1)); - Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs); - List> rows = new ArrayList>(); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", - "another one")); - MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator()); - EntityProcessor csep = new EntityProcessorWrapper( new CachedSqlEntityProcessor(), null); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(2, rows.size()); - ds.close(); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - Assert.assertEquals(r.get("desc").toString().toUpperCase(), r.get("desc")); - } - Assert.assertEquals(2, rows.size()); - Assert.assertEquals(2, rows.get(0).size()); - Assert.assertEquals(2, rows.get(1).size()); - } - - @Test - public void withoutWhereClauseWithMultiRowTransformer() { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap("column", "id")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "desc")); - String q = "select * from x where id=${x.id}"; - Map entityAttrs = AbstractDataImportHandlerTest.createMap( - "query", q, "transformer", DoubleTransformer.class.getName()); - MockDataSource ds = new MockDataSource(); - VariableResolverImpl vr = new VariableResolverImpl(); - - vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1)); - Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs); - List> rows = new ArrayList>(); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", - "another one")); - MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator()); - EntityProcessor csep = new EntityProcessorWrapper( new CachedSqlEntityProcessor(), null); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(4, rows.size()); - ds.close(); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(4, rows.size()); - Assert.assertEquals(2, rows.get(0).size()); - Assert.assertEquals(2, rows.get(1).size()); - } - - public static class DoubleTransformer extends Transformer { - - public Object transformRow(Map row, Context context) { - List> rows = new ArrayList>(); - rows.add(row); - rows.add(row); - - return rows; - } - } - - public static class UppercaseTransformer extends Transformer { - - public Object transformRow(Map row, Context context) { - for (Map.Entry entry : row.entrySet()) { - Object val = entry.getValue(); - if (val instanceof String) { - String s = (String) val; - entry.setValue(s.toUpperCase()); - } - } - return row; - } - } - - @Test - public void withWhereClause() { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap("column", "id")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "desc")); - String q = "select * from x"; - Map entityAttrs = AbstractDataImportHandlerTest.createMap( - "query", q, EntityProcessorBase.CACHE_KEY,"id", EntityProcessorBase.CACHE_LOOKUP ,"x.id"); - MockDataSource ds = new MockDataSource(); - VariableResolverImpl vr = new VariableResolverImpl(); - Map xNamespace = AbstractDataImportHandlerTest.createMap("id", 0); - vr.addNamespace("x", xNamespace); - Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs); - doWhereTest(q, context, ds, xNamespace); - } - - @Test - public void withKeyAndLookup() { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap("column", "id")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "desc")); - String q = "select * from x"; - Map entityAttrs = AbstractDataImportHandlerTest.createMap("query", q, "where", "id=x.id"); - MockDataSource ds = new MockDataSource(); - VariableResolverImpl vr = new VariableResolverImpl(); - Map xNamespace = AbstractDataImportHandlerTest.createMap("id", 0); - vr.addNamespace("x", xNamespace); - Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, Context.FULL_DUMP, fields, entityAttrs); - doWhereTest(q, context, ds, xNamespace); - } - - private void doWhereTest(String q, Context context, MockDataSource ds, Map xNamespace) { - List> rows = new ArrayList>(); - rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc", "two")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc", - "another two")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "three")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "another three")); - rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "another another three")); - MockDataSource.setIterator(q, rows.iterator()); - EntityProcessor csep = new EntityProcessorWrapper(new CachedSqlEntityProcessor(), null); - csep.init(context); - rows = new ArrayList>(); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(0, rows.size()); - ds.close(); - - csep.init(context); - rows = new ArrayList>(); - xNamespace.put("id", 2); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(2, rows.size()); - - csep.init(context); - rows = new ArrayList>(); - xNamespace.put("id", 3); - while (true) { - Map r = csep.nextRow(); - if (r == null) - break; - rows.add(r); - } - Assert.assertEquals(3, rows.size()); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import junit.framework.Assert; -import org.junit.Test; - -import java.io.StringReader; -import java.lang.reflect.InvocationHandler; -import java.lang.reflect.Method; -import java.lang.reflect.Proxy; -import java.sql.Clob; -import java.util.*; - -/** - * Test for ClobTransformer - * - * @version $Id: TestClobTransformer.java 762174 2009-04-05 22:18:50Z shalin $ - * @see org.apache.solr.handler.dataimport.ClobTransformer - * @since solr 1.4 - */ -public class TestClobTransformer { - @Test - public void simple() throws Exception { - List> flds = new ArrayList>(); - Map f = new HashMap(); - // - f.put(DataImporter.COLUMN, "dsc"); - f.put(ClobTransformer.CLOB, "true"); - f.put(DataImporter.NAME, "description"); - flds.add(f); - Context ctx = AbstractDataImportHandlerTest.getContext(null, new VariableResolverImpl(), null, Context.FULL_DUMP, flds, Collections.EMPTY_MAP); - Transformer t = new ClobTransformer(); - Map row = new HashMap(); - Clob clob = (Clob) Proxy.newProxyInstance(this.getClass().getClassLoader(), new Class[]{Clob.class}, new InvocationHandler() { - public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { - if (method.getName().equals("getCharacterStream")) { - return new StringReader("hello!"); - } - return null; - } - }); - - row.put("dsc", clob); - t.transformRow(row, ctx); - Assert.assertEquals("hello!", row.get("dsc")); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import junit.framework.TestCase; -import org.apache.commons.io.FileUtils; -import org.apache.solr.client.solrj.embedded.JettySolrRunner; -import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; -import org.apache.solr.client.solrj.request.DirectXmlRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.util.AbstractSolrTestCase; - -import java.io.File; - -/** - * Test for ContentStreamDataSource - * - * @version $Id: TestContentStreamDataSource.java 755141 2009-03-17 07:50:09Z shalin $ - * @since solr 1.4 - */ -public class TestContentStreamDataSource extends TestCase { - private static final String CONF_DIR = "." + File.separator + "solr" + File.separator + "conf" + File.separator; - SolrInstance instance = null; - JettySolrRunner jetty; - - - public void setUp() throws Exception { - instance = new SolrInstance("inst", null); - instance.setUp(); - jetty = createJetty(instance); - - } - - public void testSimple() throws Exception { - DirectXmlRequest req = new DirectXmlRequest("/dataimport", xml); - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("command", "full-import"); - params.set("clean", "false"); - req.setParams(params); - String url = "http://localhost:" + jetty.getLocalPort() + "/solr"; - CommonsHttpSolrServer solrServer = new CommonsHttpSolrServer(url); - solrServer.request(req); - ModifiableSolrParams qparams = new ModifiableSolrParams(); - qparams.add("q", "*:*"); - QueryResponse qres = solrServer.query(qparams); - SolrDocumentList results = qres.getResults(); - assertEquals(2, results.getNumFound()); - SolrDocument doc = results.get(0); - assertEquals("1", doc.getFieldValue("id")); - assertEquals("Hello C1", doc.getFieldValue("desc")); - } - - private class SolrInstance extends AbstractSolrTestCase { - String name; - Integer port; - File homeDir; - File confDir; - - /** - * if masterPort is null, this instance is a master -- otherwise this instance is a slave, and assumes the master is - * on localhost at the specified port. - */ - public SolrInstance(String name, Integer port) { - this.name = name; - this.port = port; - } - - public String getHomeDir() { - return homeDir.toString(); - } - - @Override - public String getSchemaFile() { - return CONF_DIR + "dataimport-schema.xml"; - } - - public String getConfDir() { - return confDir.toString(); - } - - public String getDataDir() { - return dataDir.toString(); - } - - @Override - public String getSolrConfigFile() { - return CONF_DIR + "contentstream-solrconfig.xml"; - } - - public void setUp() throws Exception { - - String home = System.getProperty("java.io.tmpdir") - + File.separator - + getClass().getName() + "-" + System.currentTimeMillis(); - - - homeDir = new File(home + "inst"); - dataDir = new File(homeDir, "data"); - confDir = new File(homeDir, "conf"); - - homeDir.mkdirs(); - dataDir.mkdirs(); - confDir.mkdirs(); - - File f = new File(confDir, "solrconfig.xml"); - FileUtils.copyFile(new File(getSolrConfigFile()), f); - f = new File(confDir, "schema.xml"); - - FileUtils.copyFile(new File(getSchemaFile()), f); - f = new File(confDir, "data-config.xml"); - FileUtils.copyFile(new File(CONF_DIR + "dataconfig-contentstream.xml"), f); - } - - public void tearDown() throws Exception { - super.tearDown(); - AbstractSolrTestCase.recurseDelete(homeDir); - } - } - - private JettySolrRunner createJetty(SolrInstance instance) throws Exception { - System.setProperty("solr.solr.home", instance.getHomeDir()); - System.setProperty("solr.data.dir", instance.getDataDir()); - JettySolrRunner jetty = new JettySolrRunner("/solr", 0); - jetty.start(); - return jetty; - } - - static String xml = "\n" - + "\n" - + " 1\n" - + " Hello C1\n" - + "\n" - + "\n" - + " 2\n" - + " Hello C2\n" - + "\n" + ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,91 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import junit.framework.Assert; -import org.junit.Test; -import org.w3c.dom.Document; - -import javax.xml.parsers.DocumentBuilderFactory; -import java.io.ByteArrayInputStream; -import java.util.ArrayList; -import java.util.List; - -/** - *

- * Test for DataConfig - *

- * - * @version $Id: TestDataConfig.java 730055 2008-12-30 06:42:48Z shalin $ - * @since solr 1.3 - */ -public class TestDataConfig extends AbstractDataImportHandlerTest { - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - } - - @Override - public String getSchemaFile() { - return "dataimport-schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "dataimport-nodatasource-solrconfig.xml"; - } - - @Test - @SuppressWarnings("unchecked") - public void testDataConfigWithDataSource() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(loadDataConfig("data-config-with-datasource.xml")); - - assertQ(req("id:1"), "//*[@numFound='1']"); - } - - @Test - public void basic() throws Exception { - javax.xml.parsers.DocumentBuilder builder = DocumentBuilderFactory - .newInstance().newDocumentBuilder(); - Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes())); - - DataConfig dc = new DataConfig(); - dc.readFromXml(doc.getDocumentElement()); - Assert.assertEquals("atrimlisting", dc.document.entities.get(0).name); - } - - private static final String xml = "\n" - + "\t\n" - + "\t\t '${indexer.last_index_time}'\">\n" - + - - "\t\t\n" + - - "\t\n" + ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.text.SimpleDateFormat; -import java.util.*; - -/** - *

- * Test for DateFormatTransformer - *

- * - * @version $Id: TestDateFormatTransformer.java 765499 2009-04-16 08:01:10Z shalin $ - * @since solr 1.3 - */ -public class TestDateFormatTransformer { - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow_SingleRow() throws Exception { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN, - "lastModified")); - fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN, - "dateAdded", RegexTransformer.SRC_COL_NAME, "lastModified", - DateFormatTransformer.DATE_TIME_FMT, "MM/dd/yyyy")); - - SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy"); - Date now = format.parse(format.format(new Date())); - - Map row = AbstractDataImportHandlerTest.createMap("lastModified", format - .format(now)); - - VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.addNamespace("e", row); - - Context context = AbstractDataImportHandlerTest.getContext(null, resolver, - null, Context.FULL_DUMP, fields, null); - new DateFormatTransformer().transformRow(row, context); - Assert.assertEquals(now, row.get("dateAdded")); - } - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow_MultipleRows() throws Exception { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN, - "lastModified")); - fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN, - "dateAdded", RegexTransformer.SRC_COL_NAME, "lastModified", - DateFormatTransformer.DATE_TIME_FMT, "MM/dd/yyyy hh:mm:ss.SSS")); - - SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss.SSS"); - Date now1 = format.parse(format.format(new Date())); - Date now2 = format.parse(format.format(new Date())); - - Map row = new HashMap(); - List list = new ArrayList(); - list.add(format.format(now1)); - list.add(format.format(now2)); - row.put("lastModified", list); - - VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.addNamespace("e", row); - - Context context = AbstractDataImportHandlerTest.getContext(null, resolver, - null, Context.FULL_DUMP, fields, null); - new DateFormatTransformer().transformRow(row, context); - List output = new ArrayList(); - output.add(now1); - output.add(now2); - Assert.assertEquals(output, row.get("dateAdded")); - } - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,361 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.Assert; -import org.apache.solr.request.LocalSolrQueryRequest; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Date; -import java.io.File; - -/** - *

- * Test for DocBuilder using the test harness - *

- * - * @version $Id: TestDocBuilder2.java 820237 2009-09-30 10:44:10Z shalin $ - * @since solr 1.3 - */ -public class TestDocBuilder2 extends AbstractDataImportHandlerTest { - - @Before - public void setUp() throws Exception { - super.setUp(); - } - - @After - public void tearDown() throws Exception { - super.tearDown(); - } - - @Override - public String getSchemaFile() { - return "dataimport-schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "dataimport-solrconfig.xml"; - } - - @Test - @SuppressWarnings("unchecked") - public void testSingleEntity() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(loadDataConfig("single-entity-data-config.xml")); - - assertQ(req("id:1"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testSingleEntity_CaseInsensitive() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desC", "one")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigWithCaseInsensitiveFields); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertTrue("Start event listener was not called", StartEventListener.executed); - assertTrue("End event listener was not called", EndEventListener.executed); - } - - @Test - @SuppressWarnings("unchecked") - public void testDynamicFields() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigWithDynamicTransformer); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("dynamic_s:test"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testRequestParamsAsVariable() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "101", "desc", "ApacheSolr")); - MockDataSource.setIterator("select * from books where category='search'", rows.iterator()); - - LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import", - "debug", "on", "clean", "true", "commit", "true", - "category", "search", - "dataConfig", requestParamAsVariable); - h.query("/dataimport", request); - assertQ(req("desc:ApacheSolr"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testRequestParamsAsFieldName() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("mypk", "101", "text", "ApacheSolr")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import", - "debug", "on", "clean", "true", "commit", "true", - "mypk", "id", "text", "desc", - "dataConfig", dataConfigWithTemplatizedFieldNames); - h.query("/dataimport", request); - assertQ(req("id:101"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testContext() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(loadDataConfig("data-config-with-transformer.xml")); - } - - @Test - @SuppressWarnings("unchecked") - public void testSkipDoc() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two", "$skipDoc", "true")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigWithDynamicTransformer); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='0']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testSkipRow() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two", "$skipRow", "true")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigWithDynamicTransformer); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='0']"); - - MockDataSource.clearCache(); - - rows = new ArrayList(); - rows.add(createMap("id", "3", "desc", "one")); - rows.add(createMap("id", "4", "desc", "two")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - rows = new ArrayList(); - rows.add(createMap("name_s", "abcd")); - MockDataSource.setIterator("3", rows.iterator()); - - rows = new ArrayList(); - rows.add(createMap("name_s", "xyz", "$skipRow", "true")); - MockDataSource.setIterator("4", rows.iterator()); - - super.runFullImport(dataConfigWithTwoEntities); - assertQ(req("id:3"), "//*[@numFound='1']"); - assertQ(req("id:4"), "//*[@numFound='1']"); - assertQ(req("name_s:abcd"), "//*[@numFound='1']"); - assertQ(req("name_s:xyz"), "//*[@numFound='0']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testStopTransform() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two", "$stopTransform", "true")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigForSkipTransform); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='1']"); - assertQ(req("name_s:xyz"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testDeleteDocs() throws Exception { - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two")); - rows.add(createMap("id", "3", "desc", "two", "$deleteDocById", "2")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigForSkipTransform); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='0']"); - assertQ(req("id:3"), "//*[@numFound='1']"); - - MockDataSource.clearCache(); - rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "one")); - rows.add(createMap("id", "3", "desc", "two", "$deleteDocByQuery", "desc:one")); - MockDataSource.setIterator("select * from x", rows.iterator()); - - super.runFullImport(dataConfigForSkipTransform); - - assertQ(req("id:1"), "//*[@numFound='0']"); - assertQ(req("id:2"), "//*[@numFound='0']"); - assertQ(req("id:3"), "//*[@numFound='1']"); - } - - @Test - public void testFileListEntityProcessor_lastIndexTime() throws Exception { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - - Map params = createMap("baseDir", tmpdir.getAbsolutePath()); - - TestFileListEntityProcessor.createFile(tmpdir, "a.xml", "a.xml".getBytes(), true); - TestFileListEntityProcessor.createFile(tmpdir, "b.xml", "b.xml".getBytes(), true); - TestFileListEntityProcessor.createFile(tmpdir, "c.props", "c.props".getBytes(), true); - super.runFullImport(dataConfigFileList, params); - assertQ(req("*:*"), "//*[@numFound='3']"); - - // Add a new file after a full index is done - TestFileListEntityProcessor.createFile(tmpdir, "t.xml", "t.xml".getBytes(), false); - super.runFullImport(dataConfigFileList, params); - // we should find only 1 because by default clean=true is passed - // and this particular import should find only one file t.xml - assertQ(req("*:*"), "//*[@numFound='1']"); - } - - public static class MockTransformer extends Transformer { - public Object transformRow(Map row, Context context) { - Assert.assertTrue("Context gave incorrect data source", context.getDataSource("mockDs") instanceof MockDataSource2); - return row; - } - } - - public static class AddDynamicFieldTransformer extends Transformer { - public Object transformRow(Map row, Context context) { - // Add a dynamic field - row.put("dynamic_s", "test"); - return row; - } - } - - public static class MockDataSource2 extends MockDataSource { - - } - - public static class StartEventListener implements EventListener { - public static boolean executed = false; - - public void onEvent(Context ctx) { - executed = true; - } - } - - public static class EndEventListener implements EventListener { - public static boolean executed = false; - - public void onEvent(Context ctx) { - executed = true; - } - } - - private final String requestParamAsVariable = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private final String dataConfigWithDynamicTransformer = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private final String dataConfigForSkipTransform = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private final String dataConfigWithTwoEntities = "\n" + - " \n" + - " " + - " \n" + - " \n" + - " " + - " " + - " " + - " \n" + - " \n" + - ""; - - private final String dataConfigWithCaseInsensitiveFields = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private final String dataConfigWithTemplatizedFieldNames = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private final String dataConfigFileList = "\n" + - "\t\n" + - "\t\t\n" + - "\t\t\t\n" + - "\t\t\n" + - "\t\n" + - ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,257 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrInputDocument; -import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap; -import org.junit.Assert; -import org.junit.Test; - -import java.util.*; - -/** - *

- * Test for DocBuilder - *

- * - * @version $Id: TestDocBuilder.java 826074 2009-10-16 20:34:16Z shalin $ - * @since solr 1.3 - */ -public class TestDocBuilder { - - @Test - public void loadClass() throws Exception { - Class clz = DocBuilder.loadClass("RegexTransformer", null); - Assert.assertNotNull(clz); - } - - @Test - public void singleEntityNoRows() { - try { - DataImporter di = new DataImporter(); - di.loadAndInit(dc_singleEntity); - DataConfig cfg = di.getConfig(); - DataConfig.Entity ent = cfg.document.entities.get(0); - MockDataSource.setIterator("select * from x", new ArrayList().iterator()); - ent.dataSrc = new MockDataSource(); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled); - Assert.assertEquals(Boolean.TRUE, swi.commitCalled); - Assert.assertEquals(0, swi.docs.size()); - Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount - .get()); - Assert - .assertEquals(0, di.getDocBuilder().importStatistics.docCount.get()); - Assert.assertEquals(0, di.getDocBuilder().importStatistics.rowsCount - .get()); - } finally { - MockDataSource.clearCache(); - } - } - - @Test - public void testDeltaImportNoRows_MustNotCommit() { - try { - DataImporter di = new DataImporter(); - di.loadAndInit(dc_deltaConfig); - DataConfig cfg = di.getConfig(); - DataConfig.Entity ent = cfg.document.entities.get(0); - MockDataSource.setIterator("select * from x", new ArrayList().iterator()); - MockDataSource.setIterator("select id from x", new ArrayList().iterator()); - ent.dataSrc = new MockDataSource(); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "delta-import")); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals(Boolean.FALSE, swi.deleteAllCalled); - Assert.assertEquals(Boolean.FALSE, swi.commitCalled); - Assert.assertEquals(0, swi.docs.size()); - Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount.get()); - Assert.assertEquals(0, di.getDocBuilder().importStatistics.docCount.get()); - Assert.assertEquals(0, di.getDocBuilder().importStatistics.rowsCount.get()); - } finally { - MockDataSource.clearCache(); - } - } - - @Test - public void singleEntityOneRow() { - try { - DataImporter di = new DataImporter(); - di.loadAndInit(dc_singleEntity); - DataConfig cfg = di.getConfig(); - DataConfig.Entity ent = cfg.document.entities.get(0); - List l = new ArrayList(); - l.add(createMap("id", 1, "desc", "one")); - MockDataSource.setIterator("select * from x", l.iterator()); - ent.dataSrc = new MockDataSource(); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled); - Assert.assertEquals(Boolean.TRUE, swi.commitCalled); - Assert.assertEquals(1, swi.docs.size()); - Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount - .get()); - Assert - .assertEquals(1, di.getDocBuilder().importStatistics.docCount.get()); - Assert.assertEquals(1, di.getDocBuilder().importStatistics.rowsCount - .get()); - - for (int i = 0; i < l.size(); i++) { - Map map = (Map) l.get(i); - SolrInputDocument doc = swi.docs.get(i); - for (Map.Entry entry : map.entrySet()) { - Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry - .getKey())); - } - } - } finally { - MockDataSource.clearCache(); - } - } - - @Test - public void testImportCommand() { - try { - DataImporter di = new DataImporter(); - di.loadAndInit(dc_singleEntity); - DataConfig cfg = di.getConfig(); - DataConfig.Entity ent = cfg.document.entities.get(0); - List l = new ArrayList(); - l.add(createMap("id", 1, "desc", "one")); - MockDataSource.setIterator("select * from x", l.iterator()); - ent.dataSrc = new MockDataSource(); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "import")); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals(Boolean.FALSE, swi.deleteAllCalled); - Assert.assertEquals(Boolean.TRUE, swi.commitCalled); - Assert.assertEquals(1, swi.docs.size()); - Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount - .get()); - Assert - .assertEquals(1, di.getDocBuilder().importStatistics.docCount.get()); - Assert.assertEquals(1, di.getDocBuilder().importStatistics.rowsCount - .get()); - - for (int i = 0; i < l.size(); i++) { - Map map = (Map) l.get(i); - SolrInputDocument doc = swi.docs.get(i); - for (Map.Entry entry : map.entrySet()) { - Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry - .getKey())); - } - } - } finally { - MockDataSource.clearCache(); - } - } - - @Test - public void singleEntityMultipleRows() { - try { - DataImporter di = new DataImporter(); - di.loadAndInit(dc_singleEntity); - DataConfig cfg = di.getConfig(); - DataConfig.Entity ent = cfg.document.entities.get(0); - ent.isDocRoot = true; - DataImporter.RequestParams rp = new DataImporter.RequestParams(); - rp.command = "full-import"; - List l = new ArrayList(); - l.add(createMap("id", 1, "desc", "one")); - l.add(createMap("id", 2, "desc", "two")); - l.add(createMap("id", 3, "desc", "three")); - - MockDataSource.setIterator("select * from x", l.iterator()); - ent.dataSrc = new MockDataSource(); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled); - Assert.assertEquals(Boolean.TRUE, swi.commitCalled); - Assert.assertEquals(3, swi.docs.size()); - for (int i = 0; i < l.size(); i++) { - Map map = (Map) l.get(i); - SolrInputDocument doc = swi.docs.get(i); - for (Map.Entry entry : map.entrySet()) { - Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey())); - } - Assert.assertEquals(map.get("desc"), doc.getFieldValue("desc_s")); - } - Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount - .get()); - Assert - .assertEquals(3, di.getDocBuilder().importStatistics.docCount.get()); - Assert.assertEquals(3, di.getDocBuilder().importStatistics.rowsCount - .get()); - } finally { - MockDataSource.clearCache(); - } - } - - static class SolrWriterImpl extends SolrWriter { - List docs = new ArrayList(); - - Boolean deleteAllCalled = Boolean.FALSE; - - Boolean commitCalled = Boolean.FALSE; - - public SolrWriterImpl() { - super(null, "."); - } - - public boolean upload(SolrInputDocument doc) { - return docs.add(doc); - } - - public void log(int event, String name, Object row) { - // Do nothing - } - - public void doDeleteAll() { - deleteAllCalled = Boolean.TRUE; - } - - public void commit(boolean b) { - commitCalled = Boolean.TRUE; - } - } - - public static final String dc_singleEntity = "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " " + " \n" - + " \n" + ""; - - public static final String dc_deltaConfig = "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " " + " \n" - + " \n" + ""; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - *

- * Test for EntityProcessorBase - *

- * - * @version $Id: TestEntityProcessorBase.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.3 - */ -public class TestEntityProcessorBase { - - @Test - public void multiTransformer() { - List> fields = new ArrayList>(); - Map entity = new HashMap(); - entity.put("transformer", T1.class.getName() + "," + T2.class.getName() - + "," + T3.class.getName()); - fields.add(TestRegexTransformer.getField("A", null, null, null, null)); - fields.add(TestRegexTransformer.getField("B", null, null, null, null)); - - Context context = AbstractDataImportHandlerTest.getContext(null, null, new MockDataSource(), Context.FULL_DUMP, - fields, entity); - Map src = new HashMap(); - src.put("A", "NA"); - src.put("B", "NA"); - EntityProcessorWrapper sep = new EntityProcessorWrapper(new SqlEntityProcessor(), null); - sep.init(context); - Map res = sep.applyTransformer(src); - Assert.assertNotNull(res.get("T1")); - Assert.assertNotNull(res.get("T2")); - Assert.assertNotNull(res.get("T3")); - } - - static class T1 extends Transformer { - - public Object transformRow(Map aRow, Context context) { - aRow.put("T1", "T1 called"); - return aRow; - - } - } - - static class T2 extends Transformer { - - public Object transformRow(Map aRow, Context context) { - aRow.put("T2", "T2 called"); - return aRow; - } - } - - static class T3 { - - public Object transformRow(Map aRow) { - aRow.put("T3", "T3 called"); - return aRow; - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,175 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Reader; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -/** - * Tests exception handling during imports in DataImportHandler - * - * @version $Id: TestErrorHandling.java 776449 2009-05-19 20:33:20Z gsingers $ - * @since solr 1.4 - */ -public class TestErrorHandling extends AbstractDataImportHandlerTest { - - public void testMalformedStreamingXml() throws Exception { - StringDataSource.xml = malformedXml; - super.runFullImport(dataConfigWithStreaming); - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='1']"); - } - - public void testMalformedNonStreamingXml() throws Exception { - StringDataSource.xml = malformedXml; - super.runFullImport(dataConfigWithoutStreaming); - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='1']"); - } - - public void testAbortOnError() throws Exception { - StringDataSource.xml = malformedXml; - super.runFullImport(dataConfigAbortOnError); - assertQ(req("*:*"), "//*[@numFound='0']"); - } - - public void testTransformerErrorContinue() throws Exception { - StringDataSource.xml = wellformedXml; - List> rows = new ArrayList>(); - rows.add(createMap("id", "3", "desc", "exception-transformer")); - MockDataSource.setIterator("select * from foo", rows.iterator()); - super.runFullImport(dataConfigWithTransformer); - assertQ(req("*:*"), "//*[@numFound='3']"); - } - - @Override - public String getSchemaFile() { - return "dataimport-schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "dataimport-solrconfig.xml"; - } - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - } - - public static class StringDataSource extends DataSource { - public static String xml = ""; - - public void init(Context context, Properties initProps) { - } - - public Reader getData(String query) { - return new StringReader(xml); - } - - public void close() { - - } - } - - public static class ExceptionTransformer extends Transformer { - public Object transformRow(Map row, Context context) { - throw new RuntimeException("Test exception"); - } - } - - private String dataConfigWithStreaming = "\n" + - " " + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private String dataConfigWithoutStreaming = "\n" + - " " + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private String dataConfigAbortOnError = "\n" + - " " + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - private String dataConfigWithTransformer = "\n" + - " " + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " " + - " \n" + - " \n" + - ""; - - private String malformedXml = "\n" + - " \n" + - " 1\n" + - " test1\n" + - " \n" + - " \n" + - " 2\n" + - " test2\n" + - " \n" + - " \n" + - " 3\n" + - " test3\n" + - " \n" + - ""; - - private String wellformedXml = "\n" + - " \n" + - " 1\n" + - " test1\n" + - " \n" + - " \n" + - " 2\n" + - " test2\n" + - " \n" + - " \n" + - " 3\n" + - " test3\n" + - " \n" + - ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,145 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.junit.Assert.assertEquals; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - -import java.net.URLEncoder; -import java.text.SimpleDateFormat; -import java.util.*; - -import junit.framework.Assert; - -/** - *

Test for EvaluatorBag

- * - * @version $Id: TestEvaluatorBag.java 820241 2009-09-30 10:55:55Z shalin $ - * @since solr 1.3 - */ -public class TestEvaluatorBag { - private static final String ENCODING = "UTF-8"; - - VariableResolverImpl resolver; - - Map sqlTests; - - Map urlTests; - - @Before - public void setUp() throws Exception { - resolver = new VariableResolverImpl(); - - sqlTests = new HashMap(); - - sqlTests.put("foo\"", "foo\"\""); - sqlTests.put("foo'", "foo''"); - sqlTests.put("foo''", "foo''''"); - sqlTests.put("'foo\"", "''foo\"\""); - sqlTests.put("\"Albert D'souza\"", "\"\"Albert D''souza\"\""); - - urlTests = new HashMap(); - - urlTests.put("*:*", URLEncoder.encode("*:*", ENCODING)); - urlTests.put("price:[* TO 200]", URLEncoder.encode("price:[* TO 200]", - ENCODING)); - urlTests.put("review:\"hybrid sedan\"", URLEncoder.encode( - "review:\"hybrid sedan\"", ENCODING)); - } - - /** - * Test method for {@link EvaluatorBag#getSqlEscapingEvaluator()}. - */ - @Test - public void testGetSqlEscapingEvaluator() { - Evaluator sqlEscaper = EvaluatorBag.getSqlEscapingEvaluator(); - runTests(sqlTests, sqlEscaper); - } - - /** - * Test method for {@link EvaluatorBag#getUrlEvaluator()}. - */ - @Test - public void testGetUrlEvaluator() throws Exception { - Evaluator urlEvaluator = EvaluatorBag.getUrlEvaluator(); - runTests(urlTests, urlEvaluator); - } - - @Test - public void parseParams() { - Map m = new HashMap(); - m.put("b","B"); - VariableResolverImpl vr = new VariableResolverImpl(); - vr.addNamespace("a",m); - List l = EvaluatorBag.parseParams(" 1 , a.b, 'hello!', 'ds,o,u\'za',",vr); - Assert.assertEquals(new Double(1),l.get(0)); - Assert.assertEquals("B",((EvaluatorBag.VariableWrapper)l.get(1)).resolve()); - Assert.assertEquals("hello!",l.get(2)); - Assert.assertEquals("ds,o,u'za",l.get(3)); - } - - @Test - public void testEscapeSolrQueryFunction() { - final VariableResolverImpl resolver = new VariableResolverImpl(); - ContextImpl context = new ContextImpl(null, resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null, null); - resolver.context = context; - Map m= new HashMap(); - m.put("query","c:t"); - resolver.addNamespace("dataimporter.functions", EvaluatorBag - .getFunctionsNamespace(Collections.EMPTY_LIST, null)); - resolver.addNamespace("e",m); - String s = resolver - .replaceTokens("${dataimporter.functions.escapeQueryChars(e.query)}"); - org.junit.Assert.assertEquals("c\\:t", s); - } - - /** - * Test method for {@link EvaluatorBag#getDateFormatEvaluator()}. - */ - @Test - public void testGetDateFormatEvaluator() { - Evaluator dateFormatEval = EvaluatorBag.getDateFormatEvaluator(); - resolver.context = new ContextImpl(null, resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null, null); - - long time = System.currentTimeMillis(); - assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date(time - 2*86400*1000)), - dateFormatEval.evaluate("'NOW-2DAYS','yyyy-MM-dd HH:mm'", resolver.context)); - - Map map = new HashMap(); - map.put("key", new Date(time)); - resolver.addNamespace("A", map); - - assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date(time)), - dateFormatEval.evaluate("A.key, 'yyyy-MM-dd HH:mm'", resolver.context)); - } - - private void runTests(Map tests, Evaluator evaluator) { - ContextImpl ctx = new ContextImpl(null, resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null, null); - resolver.context = ctx; - for (Map.Entry entry : tests.entrySet()) { - Map values = new HashMap(); - values.put("key", entry.getKey()); - resolver.addNamespace("A", values); - - String expected = (String) entry.getValue(); - String actual = evaluator.evaluate("A.key", ctx); - assertEquals(expected, actual); - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import junit.framework.Assert; -import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Test for FieldReaderDataSource - * - * @version $Id: TestFieldReader.java 730055 2008-12-30 06:42:48Z shalin $ - * @see org.apache.solr.handler.dataimport.FieldReaderDataSource - * @since 1.4 - */ -public class TestFieldReader { - - @Test - public void simple() { - DataImporter di = new DataImporter(); - di.loadAndInit(config); - TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl(); - DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "full-import")); - List> l = new ArrayList>(); - l.add(createMap("xml", xml)); - MockDataSource.setIterator("select * from a", l.iterator()); - di.runCmd(rp, sw); - Assert.assertEquals(sw.docs.get(0).getFieldValue("y"), "Hello"); - MockDataSource.clearCache(); - } - - String config = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - " \n" + - ""; - - String xml = "\n" + - " Hello\n" + - ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,201 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.junit.Assert; -import org.junit.Test; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.*; - -/** - *

- * Test for FileListEntityProcessor - *

- * - * @version $Id: TestFileListEntityProcessor.java 820237 2009-09-30 10:44:10Z shalin $ - * @since solr 1.3 - */ -public class TestFileListEntityProcessor { - - @Test - @SuppressWarnings("unchecked") - public void testSimple() throws IOException { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - createFile(tmpdir, "a.xml", "a.xml".getBytes(), false); - createFile(tmpdir, "b.xml", "b.xml".getBytes(), false); - createFile(tmpdir, "c.props", "c.props".getBytes(), false); - Map attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, "xml$", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath()); - Context c = AbstractDataImportHandlerTest.getContext(null, - new VariableResolverImpl(), null, Context.FULL_DUMP, Collections.EMPTY_LIST, attrs); - FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor(); - fileListEntityProcessor.init(c); - List fList = new ArrayList(); - while (true) { - Map f = fileListEntityProcessor.nextRow(); - if (f == null) - break; - fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE)); - } - Assert.assertEquals(2, fList.size()); - } - - @Test - public void testBiggerSmallerFiles() throws IOException { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - long minLength = Long.MAX_VALUE; - String smallestFile = ""; - byte[] content = "abcdefgij".getBytes("UTF-8"); - createFile(tmpdir, "a.xml", content, false); - if (minLength > content.length) { - minLength = content.length; - smallestFile = "a.xml"; - } - content = "abcdefgij".getBytes("UTF-8"); - createFile(tmpdir, "b.xml", content, false); - if (minLength > content.length) { - minLength = content.length; - smallestFile = "b.xml"; - } - content = "abc".getBytes("UTF-8"); - createFile(tmpdir, "c.props", content, false); - if (minLength > content.length) { - minLength = content.length; - smallestFile = "c.props"; - } - Map attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, ".*", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), - FileListEntityProcessor.BIGGER_THAN, String.valueOf(minLength)); - List fList = getFiles(null, attrs); - Assert.assertEquals(2, fList.size()); - Set l = new HashSet(); - l.add(new File(tmpdir, "a.xml").getAbsolutePath()); - l.add(new File(tmpdir, "b.xml").getAbsolutePath()); - Assert.assertEquals(l, new HashSet(fList)); - attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, ".*", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), - FileListEntityProcessor.SMALLER_THAN, String.valueOf(minLength+1)); - fList = getFiles(null, attrs); - l.clear(); - l.add(new File(tmpdir, smallestFile).getAbsolutePath()); - Assert.assertEquals(l, new HashSet(fList)); - attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, ".*", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), - FileListEntityProcessor.SMALLER_THAN, "${a.x}"); - VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.addNamespace("a", AbstractDataImportHandlerTest.createMap("x", "4")); - fList = getFiles(resolver, attrs); - Assert.assertEquals(l, new HashSet(fList)); - } - - @SuppressWarnings("unchecked") - static List getFiles(VariableResolverImpl resolver, Map attrs) { - Context c = AbstractDataImportHandlerTest.getContext(null, - resolver, null, Context.FULL_DUMP, Collections.EMPTY_LIST, attrs); - FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor(); - fileListEntityProcessor.init(c); - List fList = new ArrayList(); - while (true) { - Map f = fileListEntityProcessor.nextRow(); - if (f == null) - break; - fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE)); - } - return fList; - } - - @Test - public void testNTOT() throws IOException { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - createFile(tmpdir, "a.xml", "a.xml".getBytes(), true); - createFile(tmpdir, "b.xml", "b.xml".getBytes(), true); - createFile(tmpdir, "c.props", "c.props".getBytes(), true); - Map attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, "xml$", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), - FileListEntityProcessor.OLDER_THAN, "'NOW'"); - List fList = getFiles(null, attrs); - Assert.assertEquals(2, fList.size()); - attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, ".xml$", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), - FileListEntityProcessor.NEWER_THAN, "'NOW-2HOURS'"); - fList = getFiles(null, attrs); - Assert.assertEquals(2, fList.size()); - - // Use a variable for newerThan - attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, ".xml$", - FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), - FileListEntityProcessor.NEWER_THAN, "${a.x}"); - VariableResolverImpl resolver = new VariableResolverImpl(); - String lastMod = DataImporter.DATE_TIME_FORMAT.get().format(new Date(System.currentTimeMillis() - 50000)); - resolver.addNamespace("a", AbstractDataImportHandlerTest.createMap("x", lastMod)); - createFile(tmpdir, "t.xml", "t.xml".getBytes(), false); - fList = getFiles(resolver, attrs); - Assert.assertEquals(1, fList.size()); - Assert.assertEquals("File name must be t.xml", new File(tmpdir, "t.xml").getAbsolutePath(), fList.get(0)); - } - - @Test - public void testRECURSION() throws IOException { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - File childdir = new File(tmpdir + "/child" ); - childdir.mkdirs(); - childdir.deleteOnExit(); - createFile(childdir, "a.xml", "a.xml".getBytes(), true); - createFile(childdir, "b.xml", "b.xml".getBytes(), true); - createFile(childdir, "c.props", "c.props".getBytes(), true); - Map attrs = AbstractDataImportHandlerTest.createMap( - FileListEntityProcessor.FILE_NAME, "^.*\\.xml$", - FileListEntityProcessor.BASE_DIR, childdir.getAbsolutePath(), - FileListEntityProcessor.RECURSIVE, "true"); - List fList = getFiles(null, attrs); - Assert.assertEquals(2, fList.size()); - } - - public static File createFile(File tmpdir, String name, byte[] content, - boolean changeModifiedTime) throws IOException { - File file = new File(tmpdir.getAbsolutePath() + File.separator + name); - file.deleteOnExit(); - FileOutputStream f = new FileOutputStream(file); - f.write(content); - f.close(); - if (changeModifiedTime) - file.setLastModified(System.currentTimeMillis() - 3600000); - return file; - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,182 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.sql.Connection; -import java.sql.Driver; -import java.sql.DriverManager; -import java.util.*; - -import javax.sql.DataSource; - -import org.easymock.EasyMock; -import org.easymock.IMocksControl; -import org.junit.*; - -/** - *

- * Test for JdbcDataSource - *

- *

- *

- * Note: The tests are ignored for the lack of DB support for testing - *

- * - * @version $Id: TestJdbcDataSource.java 823429 2009-10-09 07:11:34Z noble $ - * @since solr 1.3 - */ -public class TestJdbcDataSource { - Driver driver; - DataSource dataSource; - Connection connection; - IMocksControl mockControl; - JdbcDataSource jdbcDataSource = new JdbcDataSource(); - List> fields = new ArrayList>(); - - Context context = AbstractDataImportHandlerTest.getContext(null, null, - jdbcDataSource, Context.FULL_DUMP, fields, null); - - Properties props = new Properties(); - - String sysProp = System.getProperty("java.naming.factory.initial"); - - @Before - public void SetUp() throws ClassNotFoundException { - System.setProperty("java.naming.factory.initial", - MockInitialContextFactory.class.getName()); - - mockControl = EasyMock.createStrictControl(); - driver = mockControl.createMock(Driver.class); - dataSource = mockControl.createMock(DataSource.class); - connection = mockControl.createMock(Connection.class); - } - - @After - public void tearDown() { - if (sysProp == null) { - System.getProperties().remove("java.naming.factory.initial"); - } else { - System.setProperty("java.naming.factory.initial", sysProp); - } - } - - @Test - public void retrieveFromJndi() throws Exception { - MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource); - - props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB"); - - EasyMock.expect(dataSource.getConnection()).andReturn(connection); - connection.setAutoCommit(false); -// connection.setHoldability(1); - - mockControl.replay(); - - Connection conn = jdbcDataSource.createConnectionFactory(context, props) - .call(); - - mockControl.verify(); - - Assert.assertSame("connection", conn, connection); - } - - @Test - public void retrieveFromJndiWithCredentials() throws Exception { - MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource); - - props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB"); - props.put("user", "Fred"); - props.put("password", "4r3d"); - props.put("holdability", "HOLD_CURSORS_OVER_COMMIT"); - - EasyMock.expect(dataSource.getConnection("Fred", "4r3d")).andReturn( - connection); - connection.setAutoCommit(false); - connection.setHoldability(1); - - mockControl.replay(); - - Connection conn = jdbcDataSource.createConnectionFactory(context, props) - .call(); - - mockControl.verify(); - - Assert.assertSame("connection", conn, connection); - } - - @Test - public void retrieveFromDriverManager() throws Exception { - DriverManager.registerDriver(driver); - - EasyMock.expect( - driver.connect((String) EasyMock.notNull(), (Properties) EasyMock - .notNull())).andReturn(connection); - connection.setAutoCommit(false); - connection.setHoldability(1); - - props.put(JdbcDataSource.DRIVER, driver.getClass().getName()); - props.put(JdbcDataSource.URL, "jdbc:fakedb"); - props.put("holdability", "HOLD_CURSORS_OVER_COMMIT"); - mockControl.replay(); - - Connection conn = jdbcDataSource.createConnectionFactory(context, props) - .call(); - - mockControl.verify(); - - Assert.assertSame("connection", conn, connection); - } - - @Test - @Ignore - public void basic() throws Exception { - JdbcDataSource dataSource = new JdbcDataSource(); - Properties p = new Properties(); - p.put("driver", "com.mysql.jdbc.Driver"); - p.put("url", "jdbc:mysql://localhost/autos"); - p.put("user", "root"); - p.put("password", ""); - - List> flds = new ArrayList>(); - Map f = new HashMap(); - f.put("column", "trim_id"); - f.put("type", "long"); - flds.add(f); - f = new HashMap(); - f.put("column", "msrp"); - f.put("type", "float"); - flds.add(f); - - Context c = AbstractDataImportHandlerTest.getContext(null, null, - dataSource, Context.FULL_DUMP, flds, null); - dataSource.init(c, p); - Iterator> i = dataSource - .getData("select make,model,year,msrp,trim_id from atrimlisting where make='Acura'"); - int count = 0; - Object msrp = null; - Object trim_id = null; - while (i.hasNext()) { - Map map = i.next(); - msrp = map.get("msrp"); - trim_id = map.get("trim_id"); - count++; - } - Assert.assertEquals(5, count); - Assert.assertEquals(Float.class, msrp.getClass()); - Assert.assertEquals(Long.class, trim_id.getClass()); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,250 +0,0 @@ -package org.apache.solr.handler.dataimport; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.*; - - -/** - *

Test for TestLineEntityProcessor

- * - * @version $Id: TestLineEntityProcessor.java 766638 2009-04-20 10:12:50Z shalin $ - * @since solr 1.4 - */ -public class TestLineEntityProcessor { - - @Test - /************************************************************************/ - public void simple() throws IOException { - - /* we want to create the equiv of :- - * - */ - - Map attrs = AbstractDataImportHandlerTest.createMap( - LineEntityProcessor.URL, "dummy.lis", - LineEntityProcessor.ACCEPT_LINE_REGEX, null, - LineEntityProcessor.SKIP_LINE_REGEX, null - ); - - Context c = AbstractDataImportHandlerTest.getContext( - null, //parentEntity - new VariableResolverImpl(), //resolver - getDataSource(filecontents), //parentDataSource - Context.FULL_DUMP, //currProcess - Collections.EMPTY_LIST, //entityFields - attrs //entityAttrs - ); - LineEntityProcessor ep = new LineEntityProcessor(); - ep.init(c); - - /// call the entity processor to the list of lines - System.out.print("\n"); - List fList = new ArrayList(); - while (true) { - Map f = ep.nextRow(); - if (f == null) break; - fList.add((String) f.get("rawLine")); - System.out.print(" rawLine='" + f.get("rawLine") + "'\n"); - } - Assert.assertEquals(24, fList.size()); - } - - @Test - /************************************************************************/ - public void only_xml_files() throws IOException { - - /* we want to create the equiv of :- - * - */ - Map attrs = AbstractDataImportHandlerTest.createMap( - LineEntityProcessor.URL, "dummy.lis", - LineEntityProcessor.ACCEPT_LINE_REGEX, "xml", - LineEntityProcessor.SKIP_LINE_REGEX, null - ); - - Context c = AbstractDataImportHandlerTest.getContext( - null, //parentEntity - new VariableResolverImpl(), //resolver - getDataSource(filecontents), //parentDataSource - Context.FULL_DUMP, //currProcess - Collections.EMPTY_LIST, //entityFields - attrs //entityAttrs - ); - LineEntityProcessor ep = new LineEntityProcessor(); - ep.init(c); - - /// call the entity processor to the list of lines - List fList = new ArrayList(); - while (true) { - Map f = ep.nextRow(); - if (f == null) break; - fList.add((String) f.get("rawLine")); - } - Assert.assertEquals(5, fList.size()); - } - - @Test - /************************************************************************/ - public void only_xml_files_no_xsd() throws IOException { - /* we want to create the equiv of :- - * - */ - Map attrs = AbstractDataImportHandlerTest.createMap( - LineEntityProcessor.URL, "dummy.lis", - LineEntityProcessor.ACCEPT_LINE_REGEX, "\\.xml", - LineEntityProcessor.SKIP_LINE_REGEX, "\\.xsd" - ); - - Context c = AbstractDataImportHandlerTest.getContext( - null, //parentEntity - new VariableResolverImpl(), //resolver - getDataSource(filecontents), //parentDataSource - Context.FULL_DUMP, //currProcess - Collections.EMPTY_LIST, //entityFields - attrs //entityAttrs - ); - LineEntityProcessor ep = new LineEntityProcessor(); - ep.init(c); - - /// call the entity processor to walk the directory - List fList = new ArrayList(); - while (true) { - Map f = ep.nextRow(); - if (f == null) break; - fList.add((String) f.get("rawLine")); - } - Assert.assertEquals(4, fList.size()); - } - - @Test - /************************************************************************/ - public void no_xsd_files() throws IOException { - /* we want to create the equiv of :- - * - */ - Map attrs = AbstractDataImportHandlerTest.createMap( - LineEntityProcessor.URL, "dummy.lis", - LineEntityProcessor.SKIP_LINE_REGEX, "\\.xsd" - ); - - Context c = AbstractDataImportHandlerTest.getContext( - null, //parentEntity - new VariableResolverImpl(), //resolver - getDataSource(filecontents), //parentDataSource - Context.FULL_DUMP, //currProcess - Collections.EMPTY_LIST, //entityFields - attrs //entityAttrs - ); - LineEntityProcessor ep = new LineEntityProcessor(); - ep.init(c); - - /// call the entity processor to walk the directory - List fList = new ArrayList(); - while (true) { - Map f = ep.nextRow(); - if (f == null) break; - fList.add((String) f.get("rawLine")); - } - Assert.assertEquals(18, fList.size()); - } - - /** - * ******************************************************************** - */ - public static Map createField( - String col, // DIH column name - String type, // field type from schema.xml - String srcCol, // DIH transformer attribute 'sourceColName' - String re, // DIH regex attribute 'regex' - String rw, // DIH regex attribute 'replaceWith' - String gn // DIH regex attribute 'groupNames' - ) { - HashMap vals = new HashMap(); - vals.put("column", col); - vals.put("type", type); - vals.put("sourceColName", srcCol); - vals.put("regex", re); - vals.put("replaceWith", rw); - vals.put("groupNames", gn); - return vals; - } - - private DataSource getDataSource(final String xml) { - return new DataSource() { - public void init(Context context, Properties initProps) { - } - - public void close() { - } - - public Reader getData(String query) { - return new StringReader(xml); - } - }; - } - - private static final String filecontents = - "\n" + - "# this is what the output from 'find . -ls; looks like, athough the format\n" + - "# of the time stamp varies depending on the age of the file and your LANG \n" + - "# env setting\n" + - "412577 0 drwxr-xr-x 6 user group 204 1 Apr 10:53 /Volumes/spare/ts\n" + - "412582 0 drwxr-xr-x 13 user group 442 1 Apr 10:18 /Volumes/spare/ts/config\n" + - "412583 24 -rwxr-xr-x 1 user group 8318 1 Apr 11:10 /Volumes/spare/ts/config/dc.xsd\n" + - "412584 32 -rwxr-xr-x 1 user group 12847 1 Apr 11:10 /Volumes/spare/ts/config/dcterms.xsd\n" + - "412585 8 -rwxr-xr-x 1 user group 3156 1 Apr 11:10 /Volumes/spare/ts/config/s-deliver.css\n" + - "412586 192 -rwxr-xr-x 1 user group 97764 1 Apr 11:10 /Volumes/spare/ts/config/s-deliver.xsl\n" + - "412587 224 -rwxr-xr-x 1 user group 112700 1 Apr 11:10 /Volumes/spare/ts/config/sml-delivery-2.1.xsd\n" + - "412588 208 -rwxr-xr-x 1 user group 103419 1 Apr 11:10 /Volumes/spare/ts/config/sml-delivery-norm-2.0.dtd\n" + - "412589 248 -rwxr-xr-x 1 user group 125296 1 Apr 11:10 /Volumes/spare/ts/config/sml-delivery-norm-2.1.dtd\n" + - "412590 72 -rwxr-xr-x 1 user group 36256 1 Apr 11:10 /Volumes/spare/ts/config/jm.xsd\n" + - "412591 8 -rwxr-xr-x 1 user group 990 1 Apr 11:10 /Volumes/spare/ts/config/video.gif\n" + - "412592 8 -rwxr-xr-x 1 user group 1498 1 Apr 11:10 /Volumes/spare/ts/config/xlink.xsd\n" + - "412593 8 -rwxr-xr-x 1 user group 1155 1 Apr 11:10 /Volumes/spare/ts/config/xml.xsd\n" + - "412594 0 drwxr-xr-x 4 user group 136 1 Apr 10:18 /Volumes/spare/ts/acm19\n" + - "412621 0 drwxr-xr-x 57 user group 1938 1 Apr 10:18 /Volumes/spare/ts/acm19/data\n" + - "412622 24 -rwxr-xr-x 1 user group 8894 1 Apr 11:09 /Volumes/spare/ts/acm19/data/00000510.xml\n" + - "412623 32 -rwxr-xr-x 1 user group 14124 1 Apr 11:09 /Volumes/spare/ts/acm19/data/00000603.xml\n" + - "412624 24 -rwxr-xr-x 1 user group 11976 1 Apr 11:09 /Volumes/spare/ts/acm19/data/00001292.xml\n" + - "# tacked on an extra line to cause a file to be deleted.\n" + - "DELETE /Volumes/spare/ts/acm19/data/00001292old.xml\n" + - ""; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,166 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.text.DecimalFormatSymbols; -import java.util.ArrayList; -import java.util.List; -import java.util.Locale; -import java.util.Map; - -/** - *

- * Test for NumberFormatTransformer - *

- * - * @version $Id: TestNumberFormatTransformer.java 765499 2009-04-16 08:01:10Z shalin $ - * @since solr 1.3 - */ -public class TestNumberFormatTransformer { - private char GROUPING_SEP = new DecimalFormatSymbols().getGroupingSeparator(); - private char DECIMAL_SEP = new DecimalFormatSymbols().getDecimalSeparator(); - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow_SingleNumber() { - char GERMAN_GROUPING_SEP = new DecimalFormatSymbols(Locale.GERMANY).getGroupingSeparator(); - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER)); - l.add(AbstractDataImportHandlerTest.createMap("column", "localizedNum", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER, NumberFormatTransformer.LOCALE, "de-DE")); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap("num", "123" + GROUPING_SEP + "567", "localizedNum", "123" + GERMAN_GROUPING_SEP + "567"); - new NumberFormatTransformer().transformRow(m, c); - Assert.assertEquals(new Long(123567), m.get("num")); - Assert.assertEquals(new Long(123567), m.get("localizedNum")); - } - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow_MultipleNumbers() throws Exception { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN, - "inputs")); - fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN, - "outputs", RegexTransformer.SRC_COL_NAME, "inputs", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER)); - - List inputs = new ArrayList(); - inputs.add("123" + GROUPING_SEP + "567"); - inputs.add("245" + GROUPING_SEP + "678"); - Map row = AbstractDataImportHandlerTest.createMap("inputs", inputs); - - VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.addNamespace("e", row); - - Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, Context.FULL_DUMP, fields, null); - new NumberFormatTransformer().transformRow(row, context); - - List output = new ArrayList(); - output.add(new Long(123567)); - output.add(new Long(245678)); - Map outputRow = AbstractDataImportHandlerTest.createMap("inputs", inputs, - "outputs", output); - - Assert.assertEquals(outputRow, row); - } - - @Test(expected = DataImportHandlerException.class) - @SuppressWarnings("unchecked") - public void testTransformRow_InvalidInput1_Number() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap("num", "123" + GROUPING_SEP + "5a67"); - new NumberFormatTransformer().transformRow(m, c); - } - - @Test(expected = DataImportHandlerException.class) - @SuppressWarnings("unchecked") - public void testTransformRow_InvalidInput2_Number() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap("num", "123" + GROUPING_SEP + "567b"); - new NumberFormatTransformer().transformRow(m, c); - } - - @Test(expected = DataImportHandlerException.class) - @SuppressWarnings("unchecked") - public void testTransformRow_InvalidInput2_Currency() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.CURRENCY)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap("num", "123" + GROUPING_SEP + "567b"); - new NumberFormatTransformer().transformRow(m, c); - } - - @Test(expected = DataImportHandlerException.class) - @SuppressWarnings("unchecked") - public void testTransformRow_InvalidInput1_Percent() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.PERCENT)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap("num", "123" + GROUPING_SEP + "5a67"); - new NumberFormatTransformer().transformRow(m, c); - } - - @Test(expected = DataImportHandlerException.class) - @SuppressWarnings("unchecked") - public void testTransformRow_InvalidInput3_Currency() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.CURRENCY)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap( - "num", "123" + DECIMAL_SEP + "456" + DECIMAL_SEP + "789"); - new NumberFormatTransformer().transformRow(m, c); - } - - @Test(expected = DataImportHandlerException.class) - @SuppressWarnings("unchecked") - public void testTransformRow_InvalidInput3_Number() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap( - "num", "123" + DECIMAL_SEP + "456" + DECIMAL_SEP + "789"); - new NumberFormatTransformer().transformRow(m, c); - } - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow_MalformedInput_Number() { - List l = new ArrayList(); - l.add(AbstractDataImportHandlerTest.createMap("column", "num", - NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER)); - Context c = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, l, null); - Map m = AbstractDataImportHandlerTest.createMap( - "num", "123" + GROUPING_SEP + GROUPING_SEP + "789"); - new NumberFormatTransformer().transformRow(m, c); - Assert.assertEquals(new Long(123789), m.get("num")); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,70 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import junit.framework.Assert; -import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap; -import org.junit.Test; - -import java.io.StringReader; -import java.util.Properties; - -/** - * Test for PlainTextEntityProcessor - * - * @version $Id: TestPlainTextEntityProcessor.java 738401 2009-01-28 08:30:02Z shalin $ - * @see org.apache.solr.handler.dataimport.PlainTextEntityProcessor - * @since solr 1.4 - */ -public class TestPlainTextEntityProcessor { - @Test - public void simple() { - DataImporter di = new DataImporter(); - di.loadAndInit(DATA_CONFIG); - TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl(); - DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "full-import")); - di.runCmd(rp, sw); - Assert.assertEquals(DS.s, sw.docs.get(0).getFieldValue("x")); - - } - - public static class DS extends DataSource { - static String s = "hello world"; - - public void init(Context context, Properties initProps) { - - } - - public Object getData(String query) { - - return new StringReader(s); - } - - public void close() { - - } - } - - static String DATA_CONFIG = "\n" + - "\t\n" + - "\t\n" + - "\t\t\n" + - "\t\t\t\n" + - "\t\t\n" + - "\t\n" + - ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,209 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.RegexTransformer.REGEX; -import static org.apache.solr.handler.dataimport.RegexTransformer.GROUP_NAMES; -import static org.apache.solr.handler.dataimport.RegexTransformer.REPLACE_WITH; -import static org.apache.solr.handler.dataimport.DataImporter.COLUMN; -import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap; -import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.getContext; -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - *

Test for RegexTransformer

- * - * @version $Id: TestRegexTransformer.java 823798 2009-10-10 06:13:55Z noble $ - * @since solr 1.3 - */ -public class TestRegexTransformer { - - @Test - public void commaSeparated() { - List> fields = new ArrayList>(); - // - fields.add(getField("col1", "string", null, "a", ",")); - Context context = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, fields, null); - - Map src = new HashMap(); - src.put("a", "a,bb,cc,d"); - - Map result = new RegexTransformer().transformRow(src, context); - Assert.assertEquals(2, result.size()); - Assert.assertEquals(4, ((List) result.get("col1")).size()); - } - - - @Test - public void groupNames() { - List> fields = new ArrayList>(); - // - Map m = new HashMap(); - m.put(COLUMN,"fullName"); - m.put(GROUP_NAMES,",firstName,lastName"); - m.put(REGEX,"(\\w*) (\\w*) (\\w*)"); - fields.add(m); - Context context = AbstractDataImportHandlerTest.getContext(null, null, null, Context.FULL_DUMP, fields, null); - Map src = new HashMap(); - src.put("fullName", "Mr Noble Paul"); - - Map result = new RegexTransformer().transformRow(src, context); - Assert.assertEquals("Noble", result.get("firstName")); - Assert.assertEquals("Paul", result.get("lastName")); - src= new HashMap(); - List l= new ArrayList(); - l.add("Mr Noble Paul") ; - l.add("Mr Shalin Mangar") ; - src.put("fullName", l); - result = new RegexTransformer().transformRow(src, context); - List l1 = (List) result.get("firstName"); - List l2 = (List) result.get("lastName"); - Assert.assertEquals("Noble", l1.get(0)); - Assert.assertEquals("Shalin", l1.get(1)); - Assert.assertEquals("Paul", l2.get(0)); - Assert.assertEquals("Mangar", l2.get(1)); - } - - @Test - public void replaceWith() { - List> fields = new ArrayList>(); - // - Map fld = getField("name", "string", "'", null, null); - fld.put(REPLACE_WITH, "''"); - fields.add(fld); - Context context = AbstractDataImportHandlerTest.getContext(null, null, - null, Context.FULL_DUMP, fields, null); - - Map src = new HashMap(); - String s = "D'souza"; - src.put("name", s); - - Map result = new RegexTransformer().transformRow(src, - context); - Assert.assertEquals("D''souza", result.get("name")); - } - - @Test - public void mileage() { - // init a whole pile of fields - List> fields = getFields(); - - // add another regex which reuses result from previous regex again! - // - Map fld = getField("hltCityMPG", "string", - ".*(${e.city_mileage})", "rowdata", null); - fld.put(REPLACE_WITH, "*** $1 ***"); - fields.add(fld); - - // **ATTEMPTS** a match WITHOUT a replaceWith - // - fld = getField("t1", "string","duff", "rowdata", null); - fields.add(fld); - - // **ATTEMPTS** a match WITH a replaceWith - // - fld = getField("t2", "string","duff", "rowdata", null); - fld.put(REPLACE_WITH, "60"); - fields.add(fld); - - // regex WITH both replaceWith and groupName (groupName ignored!) - // - fld = getField("t3", "string","(Range)", "rowdata", null); - fld.put(REPLACE_WITH, "range"); - fld.put(GROUP_NAMES,"t4,t5"); - fields.add(fld); - - Map row = new HashMap(); - String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City"; - row.put("rowdata", s); - - VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.addNamespace("e", row); - Map eAttrs = AbstractDataImportHandlerTest.createMap("name", "e"); - Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, Context.FULL_DUMP, fields, eAttrs); - - Map result = new RegexTransformer().transformRow(row, context); - Assert.assertEquals(5, result.size()); - Assert.assertEquals(s, result.get("rowdata")); - Assert.assertEquals("26", result.get("highway_mileage")); - Assert.assertEquals("19", result.get("city_mileage")); - Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG")); - Assert.assertEquals("Fuel Economy range: 26 mpg Hwy, 19 mpg City", result.get("t3")); - } - - @Test - public void testMultiValuedRegex(){ - List> fields = new ArrayList>(); -// - Map fld = getField("participant", null, "(.*)", "person", null); - fields.add(fld); - Context context = getContext(null, null, - null, Context.FULL_DUMP, fields, null); - - ArrayList strings = new ArrayList(); - strings.add("hello"); - strings.add("world"); - Map result = new RegexTransformer().transformRow(createMap("person", strings), context); - Assert.assertEquals(strings,result.get("participant")); - - - } - - public static List> getFields() { - List> fields = new ArrayList>(); - - // - fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null)); - - // - fields.add(getField("rowdata", "string", null, "rowdata", null)); - return fields; - } - - public static Map getField(String col, String type, - String re, String srcCol, String splitBy) { - HashMap vals = new HashMap(); - vals.put("column", col); - vals.put("type", type); - vals.put("regex", re); - vals.put("sourceColName", srcCol); - vals.put("splitBy", splitBy); - return vals; - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,143 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.InputSource; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - *

- * Test for ScriptTransformer - *

- *

- * All tests in this have been ignored because script support is only available - * in Java 1.6+ - * - * @version $Id: TestScriptTransformer.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.3 - */ -public class TestScriptTransformer { - - @Test - @Ignore - public void basic() { - String script = "function f1(row,context){" - + "row.put('name','Hello ' + row.get('name'));" + "return row;\n" + "}"; - Context context = getContext("f1", script); - Map map = new HashMap(); - map.put("name", "Scott"); - EntityProcessorWrapper sep = new EntityProcessorWrapper(new SqlEntityProcessor(), null); - sep.init(context); - sep.applyTransformer(map); - Assert.assertEquals(map.get("name"), "Hello Scott"); - - } - - private Context getContext(String funcName, String script) { - List> fields = new ArrayList>(); - Map entity = new HashMap(); - entity.put("name", "hello"); - entity.put("transformer", "script:" + funcName); - - AbstractDataImportHandlerTest.TestContext context = AbstractDataImportHandlerTest.getContext(null, null, null, - Context.FULL_DUMP, fields, entity); - context.script = script; - context.scriptlang = "JavaScript"; - return context; - } - - @Test - @Ignore - public void oneparam() { - - String script = "function f1(row){" - + "row.put('name','Hello ' + row.get('name'));" + "return row;\n" + "}"; - - Context context = getContext("f1", script); - Map map = new HashMap(); - map.put("name", "Scott"); - EntityProcessorWrapper sep = new EntityProcessorWrapper(new SqlEntityProcessor(), null); - sep.init(context); - sep.applyTransformer(map); - Assert.assertEquals(map.get("name"), "Hello Scott"); - - } - - @Test - @Ignore - public void readScriptTag() throws Exception { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); - Document document = builder.parse(new InputSource(new StringReader(xml))); - DataConfig config = new DataConfig(); - config.readFromXml((Element) document.getElementsByTagName("dataConfig") - .item(0)); - Assert.assertTrue(config.script.text.indexOf("checkNextToken") > -1); - } - - @Test - @Ignore - public void checkScript() throws Exception { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); - Document document = builder.parse(new InputSource(new StringReader(xml))); - DataConfig config = new DataConfig(); - config.readFromXml((Element) document.getElementsByTagName("dataConfig") - .item(0)); - - Context c = getContext("checkNextToken", config.script.text); - - Map map = new HashMap(); - map.put("nextToken", "hello"); - EntityProcessorWrapper sep = new EntityProcessorWrapper(new SqlEntityProcessor(), null); - sep.init(c); - sep.applyTransformer(map); - Assert.assertEquals("true", map.get("$hasMore")); - map = new HashMap(); - map.put("nextToken", ""); - sep.applyTransformer(map); - Assert.assertNull(map.get("$hasMore")); - - } - - static String xml = "\n" - + "\t\n" - + "\t\t\n" - + "\n" + "\t\t\t\n" - + "\n" + "\t\t\n" + "\t\n" + ""; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,245 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.text.SimpleDateFormat; -import java.text.ParseException; - -/** - *

- * Test for SqlEntityProcessor which checks full and delta imports using the - * test harness - *

- * - * @version $Id: TestSqlEntityProcessor2.java 822904 2009-10-07 20:50:34Z shalin $ - * @since solr 1.3 - */ -public class TestSqlEntityProcessor2 extends AbstractDataImportHandlerTest { - @Override - public String getSchemaFile() { - return "dataimport-schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "dataimport-solrconfig.xml"; - } - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_FullImport() throws Exception { - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x", parentRow.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - - MockDataSource.setIterator("select * from y where y.A=1", childRow - .iterator()); - - super.runFullImport(dataConfig); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_FullImportNoCommit() throws Exception { - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "10")); - MockDataSource.setIterator("select * from x", parentRow.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - - MockDataSource.setIterator("select * from y where y.A=10", childRow - .iterator()); - - - super.runFullImport(dataConfig,createMap("commit","false")); - assertQ(req("id:10"), "//*[@numFound='0']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport() throws Exception { - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "5")); - MockDataSource.setIterator("select id from x where last_modified > NOW", - deltaRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "5")); - MockDataSource.setIterator("select * from x where id = '5'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A=5", childRow - .iterator()); - - super.runDeltaImport(dataConfig); - - assertQ(req("id:5"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_DeletedPkQuery() throws Exception { - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "11")); - MockDataSource.setIterator("select * from x", parentRow.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - - MockDataSource.setIterator("select * from y where y.A=11", childRow - .iterator()); - - super.runFullImport(dataConfig); - - assertQ(req("id:11"), "//*[@numFound='1']"); - - - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "15")); - deltaRow.add(createMap("id", "17")); - MockDataSource.setIterator("select id from x where last_modified > NOW", - deltaRow.iterator()); - - List deltaDeleteRow = new ArrayList(); - deltaDeleteRow.add(createMap("id", "11")); - deltaDeleteRow.add(createMap("id", "17")); - MockDataSource.setIterator("select id from x where last_modified > NOW AND deleted='true'", - deltaDeleteRow.iterator()); - - parentRow = new ArrayList(); - parentRow.add(createMap("id", "15")); - MockDataSource.setIterator("select * from x where id = '15'", parentRow - .iterator()); - - parentRow = new ArrayList(); - parentRow.add(createMap("id", "17")); - MockDataSource.setIterator("select * from x where id = '17'", parentRow - .iterator()); - - super.runDeltaImport(dataConfig); - - assertQ(req("id:15"), "//*[@numFound='1']"); - assertQ(req("id:11"), "//*[@numFound='0']"); - assertQ(req("id:17"), "//*[@numFound='0']"); - - - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_DeltaImportQuery() throws Exception { - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "5")); - MockDataSource.setIterator("select id from x where last_modified > NOW", - deltaRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "5")); - MockDataSource.setIterator("select * from x where id=5", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A=5", childRow - .iterator()); - - super.runDeltaImport(dataConfig_deltaimportquery); - - assertQ(req("id:5"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testLastIndexTime() throws Exception { - List row = new ArrayList(); - row.add(createMap("id", 5)); - MockDataSource.setIterator("select * from x where last_modified > OK", row.iterator()); - super.runFullImport(dataConfig_LastIndexTime); - assertQ(req("id:5"), "//*[@numFound='1']"); - } - - static class DateFormatValidatingEvaluator extends Evaluator { - public String evaluate(String expression, Context context) { - List l = EvaluatorBag.parseParams(expression, context.getVariableResolver()); - Object o = l.get(0); - String dateStr = null; - if (o instanceof EvaluatorBag.VariableWrapper) { - EvaluatorBag.VariableWrapper wrapper = (EvaluatorBag.VariableWrapper) o; - o = wrapper.resolve(); - dateStr = o.toString(); - } - SimpleDateFormat formatter = DataImporter.DATE_TIME_FORMAT.get(); - try { - formatter.parse(dateStr); - } catch (ParseException e) { - DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e); - } - return "OK"; - } - } - - private static String dataConfig_LastIndexTime = "\n" + - "\t\n" + - "\t\n" + - "\t\t ${dih.functions.checkDateFormat(dih.last_index_time)}\" />\n" + - "\t\n" + - ""; - - private static String dataConfig = "\n" - + " \n" - + " NOW AND deleted='true'\" deltaQuery=\"select id from x where last_modified > NOW\">\n" - + " \n" - + " \n" - + " \n" - + " \n" + " \n" - + " \n" + "\n"; - - private static String dataConfig_deltaimportquery = "\n" - + " \n" - + " NOW\">\n" - + " \n" - + " \n" - + " \n" - + " \n" + " \n" - + " \n" + "\n"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,291 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - *

- * Test for SqlEntityProcessor which checks variations in primary key names and deleted ids - *

- * - * - * @version $Id: TestSqlEntityProcessor2.java 723824 2008-12-05 19:14:11Z shalin $ - * @since solr 1.3 - */ -public class TestSqlEntityProcessorDelta2 extends AbstractDataImportHandlerTest { - private static final String FULLIMPORT_QUERY = "select * from x"; - - private static final String DELTA_QUERY = "select id from x where last_modified > NOW"; - - private static final String DELETED_PK_QUERY = "select id from x where last_modified > NOW AND deleted='true'"; - - @Override - public String getSchemaFile() { - return "dataimport-solr_id-schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "dataimport-solrconfig.xml"; - } - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - } - - - @SuppressWarnings("unchecked") - private void add1document() throws Exception { - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator(FULLIMPORT_QUERY, parentRow.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runFullImport(dataConfig_delta2); - - assertQ(req("*:* OR add1document"), "//*[@numFound='1']"); - assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_FullImport() throws Exception { - add1document(); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_delete() throws Exception { - add1document(); - List deletedRow = new ArrayList(); - deletedRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELETED_PK_QUERY, deletedRow.iterator()); - - MockDataSource.setIterator(DELTA_QUERY, Collections - .EMPTY_LIST.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta2); - assertQ(req("*:* OR testCompositePk_DeltaImport_delete"), "//*[@numFound='0']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_empty() throws Exception { - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELTA_QUERY, deltaRow.iterator()); - - MockDataSource.setIterator(DELETED_PK_QUERY, Collections - .EMPTY_LIST.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x where id='1'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta2); - - assertQ(req("*:* OR testCompositePk_DeltaImport_empty"), "//*[@numFound='1']"); - assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void XtestCompositePk_DeltaImport_replace_delete() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - List deletedRow = new ArrayList(); - deletedRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELETED_PK_QUERY, - deletedRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x where id='1'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta2); - - assertQ(req("*:* OR testCompositePk_DeltaImport_replace_delete"), "//*[@numFound='0']"); - } - - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_replace_nodelete() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - MockDataSource.setIterator(DELETED_PK_QUERY, Collections - .EMPTY_LIST.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x where id='1'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta2); - - assertQ(req("*:* OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='1']"); - assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); - assertQ(req("desc:hello OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='0']"); - assertQ(req("desc:goodbye"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_add() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "2")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "2")); - MockDataSource.setIterator("select * from x where id='2'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='2'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta2); - - assertQ(req("*:* OR testCompositePk_DeltaImport_add"), "//*[@numFound='2']"); - assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); - assertQ(req("solr_id:prefix-2"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - assertQ(req("desc:goodbye"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_nodelta() throws Exception { - add1document(); - MockDataSource.clearCache(); - - MockDataSource.setIterator(DELTA_QUERY, - Collections.EMPTY_LIST.iterator()); - - super.runDeltaImport(dataConfig_delta2); - - assertQ(req("*:* OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); - assertQ(req("solr_id:prefix-1 OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); - assertQ(req("desc:hello OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_add_delete() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "2")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - List deletedRow = new ArrayList(); - deletedRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELETED_PK_QUERY, - deletedRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "2")); - MockDataSource.setIterator("select * from x where id='2'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='2'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta2); - - assertQ(req("*:* OR XtestCompositePk_DeltaImport_add_delete"), "//*[@numFound='1']"); - assertQ(req("solr_id:prefix-2"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='0']"); - assertQ(req("desc:goodbye"), "//*[@numFound='1']"); - } - - private static String dataConfig_delta2 = "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" + " \n" - + " \n" + "\n"; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,295 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - *

- * Test for SqlEntityProcessor which checks variations in primary key names and deleted ids - *

- * - * - * @version $Id: TestSqlEntityProcessor2.java 723824 2008-12-05 19:14:11Z shalin $ - * @since solr 1.3 - */ -public class TestSqlEntityProcessorDelta extends AbstractDataImportHandlerTest { - private static final String FULLIMPORT_QUERY = "select * from x"; - - private static final String DELTA_QUERY = "select id from x where last_modified > NOW"; - - private static final String DELETED_PK_QUERY = "select id from x where last_modified > NOW AND deleted='true'"; - - @Override - public String getSchemaFile() { - return "dataimport-schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "dataimport-solrconfig.xml"; - } - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - } - - - @SuppressWarnings("unchecked") - private void add1document() throws Exception { - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator(FULLIMPORT_QUERY, parentRow.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runFullImport(dataConfig_delta); - - assertQ(req("*:* OR add1document"), "//*[@numFound='1']"); - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_FullImport() throws Exception { - add1document(); - } - - // WORKS - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_delete() throws Exception { - add1document(); - List deletedRow = new ArrayList(); - deletedRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELETED_PK_QUERY, deletedRow.iterator()); - - MockDataSource.setIterator(DELTA_QUERY, Collections - .EMPTY_LIST.iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta); - assertQ(req("*:* OR testCompositePk_DeltaImport_delete"), "//*[@numFound='0']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_empty() throws Exception { - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELTA_QUERY, deltaRow.iterator()); - - MockDataSource.setIterator(DELETED_PK_QUERY, Collections - .EMPTY_LIST.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x where id='1'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "hello")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta); - - assertQ(req("*:* OR testCompositePk_DeltaImport_empty"), "//*[@numFound='1']"); - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - } - - // WORKS - - @Test - @SuppressWarnings("unchecked") - public void XtestCompositePk_DeltaImport_replace_delete() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - List deletedRow = new ArrayList(); - deletedRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELETED_PK_QUERY, - deletedRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x where id='1'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta); - - assertQ(req("*:* OR testCompositePk_DeltaImport_replace_delete"), "//*[@numFound='0']"); - } - - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_replace_nodelete() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - MockDataSource.setIterator(DELETED_PK_QUERY, Collections - .EMPTY_LIST.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "1")); - MockDataSource.setIterator("select * from x where id='1'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='1'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta); - - assertQ(req("*:* OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='1']"); - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("desc:hello OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='0']"); - assertQ(req("desc:goodbye"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_add() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "2")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "2")); - MockDataSource.setIterator("select * from x where id='2'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='2'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta); - - assertQ(req("*:* OR testCompositePk_DeltaImport_add"), "//*[@numFound='2']"); - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='1']"); - assertQ(req("desc:goodbye"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_nodelta() throws Exception { - add1document(); - MockDataSource.clearCache(); - - MockDataSource.setIterator(DELTA_QUERY, - Collections.EMPTY_LIST.iterator()); - - super.runDeltaImport(dataConfig_delta); - - assertQ(req("*:* OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); - assertQ(req("id:1 OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); - assertQ(req("desc:hello OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); - } - - @Test - @SuppressWarnings("unchecked") - public void testCompositePk_DeltaImport_add_delete() throws Exception { - add1document(); - MockDataSource.clearCache(); - - List deltaRow = new ArrayList(); - deltaRow.add(createMap("id", "2")); - MockDataSource.setIterator(DELTA_QUERY, - deltaRow.iterator()); - - List deletedRow = new ArrayList(); - deletedRow.add(createMap("id", "1")); - MockDataSource.setIterator(DELETED_PK_QUERY, - deletedRow.iterator()); - - List parentRow = new ArrayList(); - parentRow.add(createMap("id", "2")); - MockDataSource.setIterator("select * from x where id='2'", parentRow - .iterator()); - - List childRow = new ArrayList(); - childRow.add(createMap("desc", "goodbye")); - MockDataSource.setIterator("select * from y where y.A='2'", childRow - .iterator()); - - super.runDeltaImport(dataConfig_delta); - - assertQ(req("*:* OR XtestCompositePk_DeltaImport_add_delete"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='1']"); - assertQ(req("desc:hello"), "//*[@numFound='0']"); - assertQ(req("desc:goodbye"), "//*[@numFound='1']"); - } - - private static String dataConfig_delta = "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" + " \n" - + " \n" + "\n"; - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.*; - -/** - *

- * Test for SqlEntityProcessor - *

- * - * @version $Id: TestSqlEntityProcessor.java 766608 2009-04-20 07:36:55Z shalin $ - * @since solr 1.3 - */ -public class TestSqlEntityProcessor { - private static ThreadLocal local = new ThreadLocal(); - - @Test - public void singleBatch() { - SqlEntityProcessor sep = new SqlEntityProcessor(); - List> rows = getRows(3); - VariableResolverImpl vr = new VariableResolverImpl(); - HashMap ea = new HashMap(); - ea.put("query", "SELECT * FROM A"); - Context c = AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows), - Context.FULL_DUMP, null, ea); - sep.init(c); - int count = 0; - while (true) { - Map r = sep.nextRow(); - if (r == null) - break; - count++; - } - - Assert.assertEquals(3, count); - } - - @Test - public void tranformer() { - EntityProcessor sep = new EntityProcessorWrapper( new SqlEntityProcessor(), null); - List> rows = getRows(2); - VariableResolverImpl vr = new VariableResolverImpl(); - HashMap ea = new HashMap(); - ea.put("query", "SELECT * FROM A"); - ea.put("transformer", T.class.getName()); - - sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows), - Context.FULL_DUMP, null, ea)); - List> rs = new ArrayList>(); - Map r = null; - while (true) { - r = sep.nextRow(); - if (r == null) - break; - rs.add(r); - - } - Assert.assertEquals(2, rs.size()); - Assert.assertNotNull(rs.get(0).get("T")); - } - - @Test - public void tranformerWithReflection() { - EntityProcessor sep = new EntityProcessorWrapper(new SqlEntityProcessor(), null); - List> rows = getRows(2); - VariableResolverImpl vr = new VariableResolverImpl(); - HashMap ea = new HashMap(); - ea.put("query", "SELECT * FROM A"); - ea.put("transformer", T3.class.getName()); - - sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows), - Context.FULL_DUMP, null, ea)); - List> rs = new ArrayList>(); - Map r = null; - while (true) { - r = sep.nextRow(); - if (r == null) - break; - rs.add(r); - - } - Assert.assertEquals(2, rs.size()); - Assert.assertNotNull(rs.get(0).get("T3")); - } - - @Test - public void tranformerList() { - EntityProcessor sep = new EntityProcessorWrapper(new SqlEntityProcessor(),null); - List> rows = getRows(2); - VariableResolverImpl vr = new VariableResolverImpl(); - - HashMap ea = new HashMap(); - ea.put("query", "SELECT * FROM A"); - ea.put("transformer", T2.class.getName()); - sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows), - Context.FULL_DUMP, null, ea)); - - local.set(0); - Map r = null; - int count = 0; - while (true) { - r = sep.nextRow(); - if (r == null) - break; - count++; - } - Assert.assertEquals(2, local.get()); - Assert.assertEquals(4, count); - } - - private List> getRows(int count) { - List> rows = new ArrayList>(); - for (int i = 0; i < count; i++) { - Map row = new HashMap(); - row.put("id", i); - row.put("value", "The value is " + i); - rows.add(row); - } - return rows; - } - - private static DataSource>> getDs( - final List> rows) { - return new DataSource>>() { - public Iterator> getData(String query) { - return rows.iterator(); - } - - public void init(Context context, Properties initProps) { - } - - public void close() { - } - }; - } - - public static class T extends Transformer { - public Object transformRow(Map aRow, Context context) { - aRow.put("T", "Class T"); - return aRow; - } - } - - public static class T3 { - public Object transformRow(Map aRow) { - aRow.put("T3", "T3 class"); - return aRow; - } - } - - public static class T2 extends Transformer { - public Object transformRow(Map aRow, Context context) { - Integer count = local.get(); - local.set(count + 1); - List> l = new ArrayList>(); - l.add(aRow); - l.add(aRow); - return l; - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import java.util.regex.Pattern; - -/** - *

- * Test for TemplateString - *

- * - * @version $Id: TestTemplateString.java 681182 2008-07-30 19:35:58Z shalin $ - * @since solr 1.3 - */ -public class TestTemplateString { - @Test - public void testSimple() { - VariableResolverImpl vri = new VariableResolverImpl(); - Map ns = new HashMap(); - ns.put("last_index_time", Long.valueOf(1199429363730l)); - vri.addNamespace("indexer", ns); - Assert - .assertEquals( - "select id from subject where last_modified > 1199429363730", - new TemplateString() - .replaceTokens( - "select id from subject where last_modified > ${indexer.last_index_time}", - vri)); - } - - private static Properties EMPTY_PROPS = new Properties(); - - private static Pattern SELECT_WHERE_PATTERN = Pattern.compile( - "^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE); -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - *

- * Test for TemplateTransformer - *

- * - * @version $Id: TestTemplateTransformer.java 765499 2009-04-16 08:01:10Z shalin $ - * @since solr 1.3 - */ -public class TestTemplateTransformer { - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow() { - List fields = new ArrayList(); - fields.add(AbstractDataImportHandlerTest.createMap("column", "firstName")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "lastName")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "middleName")); - fields.add(AbstractDataImportHandlerTest.createMap("column", "name", - TemplateTransformer.TEMPLATE, - "${e.lastName}, ${e.firstName} ${e.middleName}")); - // test reuse of template output in another template - fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname", - TemplateTransformer.TEMPLATE,"Mr ${e.name}")); - - Map row = AbstractDataImportHandlerTest.createMap( - "firstName", "Shalin", - "middleName", "Shekhar", - "lastName", "Mangar"); - - VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.addNamespace("e", row); - Map entityAttrs = AbstractDataImportHandlerTest.createMap( - "name", "e"); - - Context context = AbstractDataImportHandlerTest.getContext(null, resolver, - null, Context.FULL_DUMP, fields, entityAttrs); - new TemplateTransformer().transformRow(row, context); - Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name")); - Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname")); - } - -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestURLDataSource.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestURLDataSource.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestURLDataSource.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestURLDataSource.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import junit.framework.Assert; - -import org.junit.Test; - -public class TestURLDataSource { - private List> fields = new ArrayList>(); - private URLDataSource dataSource = new URLDataSource(); - private VariableResolverImpl variableResolver = new VariableResolverImpl(); - private Context context = AbstractDataImportHandlerTest.getContext(null, variableResolver, - dataSource, Context.FULL_DUMP, fields, null); - private Properties initProps = new Properties(); - - @Test - public void substitutionsOnBaseUrl() throws Exception { - String url = "http://example.com/"; - - variableResolver.addNamespace("dataimporter.request", Collections.singletonMap("baseurl", url)); - - initProps.setProperty(URLDataSource.BASE_URL, "${dataimporter.request.baseurl}"); - dataSource.init(context, initProps); - Assert.assertEquals(url, dataSource.getBaseUrl()); - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,145 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; -import org.apache.solr.util.DateMathParser; - -import java.text.SimpleDateFormat; -import java.util.*; - -/** - *

- * Test for VariableResolver - *

- * - * @version $Id: TestVariableResolver.java 822889 2009-10-07 20:08:53Z shalin $ - * @since solr 1.3 - */ -public class TestVariableResolver { - - @Test - public void testSimpleNamespace() { - VariableResolverImpl vri = new VariableResolverImpl(); - Map ns = new HashMap(); - ns.put("world", "WORLD"); - vri.addNamespace("hello", ns); - Assert.assertEquals("WORLD", vri.resolve("hello.world")); - } - - @Test - public void testNestedNamespace() { - VariableResolverImpl vri = new VariableResolverImpl(); - Map ns = new HashMap(); - ns.put("world", "WORLD"); - vri.addNamespace("hello", ns); - ns = new HashMap(); - ns.put("world1", "WORLD1"); - vri.addNamespace("hello.my", ns); - Assert.assertEquals("WORLD1", vri.resolve("hello.my.world1")); - } - - @Test - public void test3LevelNestedNamespace() { - VariableResolverImpl vri = new VariableResolverImpl(); - Map ns = new HashMap(); - ns.put("world", "WORLD"); - vri.addNamespace("hello", ns); - ns = new HashMap(); - ns.put("world1", "WORLD1"); - vri.addNamespace("hello.my.new", ns); - Assert.assertEquals("WORLD1", vri.resolve("hello.my.new.world1")); - } - - @Test - public void dateNamespaceWithValue() { - VariableResolverImpl vri = new VariableResolverImpl(); - vri.context = new ContextImpl(null,vri, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null,null); - vri.addNamespace("dataimporter.functions", EvaluatorBag - .getFunctionsNamespace(Collections.EMPTY_LIST, null)); - Map ns = new HashMap(); - Date d = new Date(); - ns.put("dt", d); - vri.addNamespace("A", ns); - Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(d), - vri.replaceTokens("${dataimporter.functions.formatDate(A.dt,'yyyy-MM-dd HH:mm:ss')}")); - } - - @Test - public void dateNamespaceWithExpr() throws Exception { - VariableResolverImpl vri = new VariableResolverImpl(); - vri.context = new ContextImpl(null,vri, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null,null); - vri.addNamespace("dataimporter.functions", EvaluatorBag - .getFunctionsNamespace(Collections.EMPTY_LIST,null)); - - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); - format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(TimeZone.getDefault(), Locale.getDefault()); - - String s = vri.replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}"); - Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm").format(dmp.parseMath("/DAY")), s); - } - - @Test - public void testDefaultNamespace() { - VariableResolverImpl vri = new VariableResolverImpl(); - Map ns = new HashMap(); - ns.put("world", "WORLD"); - vri.addNamespace(null, ns); - Assert.assertEquals("WORLD", vri.resolve("world")); - } - - @Test - public void testDefaultNamespace1() { - VariableResolverImpl vri = new VariableResolverImpl(); - Map ns = new HashMap(); - ns.put("world", "WORLD"); - vri.addNamespace(null, ns); - Assert.assertEquals("WORLD", vri.resolve("world")); - } - - @Test - public void testFunctionNamespace1() throws Exception { - final VariableResolverImpl resolver = new VariableResolverImpl(); - resolver.context = new ContextImpl(null,resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null,null); - final List> l = new ArrayList>(); - Map m = new HashMap(); - m.put("name","test"); - m.put("class",E.class.getName()); - l.add(m); - - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); - format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(TimeZone.getDefault(), Locale.getDefault()); - - resolver.addNamespace("dataimporter.functions", EvaluatorBag - .getFunctionsNamespace(l,null)); - String s = resolver - .replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}"); - Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm") - .format(dmp.parseMath("/DAY")), s); - Assert.assertEquals("Hello World", resolver - .replaceTokens("${dataimporter.functions.test('TEST')}")); - } - - public static class E extends Evaluator{ - public String evaluate(String expression, Context context) { - return "Hello World"; - } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,229 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap; -import org.junit.Assert; -import org.junit.Test; - -import java.io.File; -import java.io.Reader; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -/** - *

- * Test for XPathEntityProcessor - *

- * - * @version $Id: TestXPathEntityProcessor.java 823451 2009-10-09 08:23:35Z noble $ - * @since solr 1.3 - */ -public class TestXPathEntityProcessor { - @Test - public void withFieldsAndXpath() throws Exception { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(), - false); - Map entityAttrs = createMap("name", "e", "url", "cd.xml", - XPathEntityProcessor.FOR_EACH, "/catalog/cd"); - List fields = new ArrayList(); - fields.add(createMap("column", "title", "xpath", "/catalog/cd/title")); - fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist")); - fields.add(createMap("column", "year", "xpath", "/catalog/cd/year")); - Context c = AbstractDataImportHandlerTest.getContext(null, - new VariableResolverImpl(), getDataSource(cdData), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - Assert.assertEquals(3, result.size()); - Assert.assertEquals("Empire Burlesque", result.get(0).get("title")); - Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist")); - Assert.assertEquals("1982", result.get(2).get("year")); - } - - @Test - public void testMultiValued() throws Exception { - Map entityAttrs = createMap("name", "e", "url", "testdata.xml", - XPathEntityProcessor.FOR_EACH, "/root"); - List fields = new ArrayList(); - fields.add(createMap("column", "a", "xpath", "/root/a", DataImporter.MULTI_VALUED, "true")); - Context c = AbstractDataImportHandlerTest.getContext(null, - new VariableResolverImpl(), getDataSource(testXml), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - Assert.assertEquals(2, ((List)result.get(0).get("a")).size()); - } - - @Test - public void testMultiValuedFlatten() throws Exception { - Map entityAttrs = createMap("name", "e", "url", "testdata.xml", - XPathEntityProcessor.FOR_EACH, "/root"); - List fields = new ArrayList(); - fields.add(createMap("column", "a", "xpath", "/root/a" ,"flatten","true")); - Context c = AbstractDataImportHandlerTest.getContext(null, - new VariableResolverImpl(), getDataSource(testXmlFlatten), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - Map result = null; - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result = row; - } - Assert.assertEquals("1B2", result.get("a")); - } - - @Test - public void withFieldsAndXpathStream() throws Exception { - Map entityAttrs = createMap("name", "e", "url", "cd.xml", - XPathEntityProcessor.FOR_EACH, "/catalog/cd", "stream", "true", "batchSize","1"); - List fields = new ArrayList(); - fields.add(createMap("column", "title", "xpath", "/catalog/cd/title")); - fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist")); - fields.add(createMap("column", "year", "xpath", "/catalog/cd/year")); - Context c = AbstractDataImportHandlerTest.getContext(null, - new VariableResolverImpl(), getDataSource(cdData), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - Assert.assertEquals(3, result.size()); - Assert.assertEquals("Empire Burlesque", result.get(0).get("title")); - Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist")); - Assert.assertEquals("1982", result.get(2).get("year")); - } - - @Test - public void withDefaultSolrAndXsl() throws Exception { - long time = System.currentTimeMillis(); - File tmpdir = new File("." + time); - tmpdir.mkdir(); - tmpdir.deleteOnExit(); - TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(), - false); - Map entityAttrs = createMap("name", "e", - XPathEntityProcessor.USE_SOLR_ADD_SCHEMA, "true", "xsl", "" - + new File(tmpdir, "x.xsl").getAbsolutePath(), "url", "cd.xml"); - Context c = AbstractDataImportHandlerTest.getContext(null, - new VariableResolverImpl(), getDataSource(cdData), Context.FULL_DUMP, null, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - Assert.assertEquals(3, result.size()); - Assert.assertEquals("Empire Burlesque", result.get(0).get("title")); - Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist")); - Assert.assertEquals("1982", result.get(2).get("year")); - } - - private DataSource getDataSource(final String xml) { - return new DataSource() { - - public void init(Context context, Properties initProps) { - } - - public void close() { - } - - public Reader getData(String query) { - return new StringReader(xml); - } - }; - } - - private static final String xsl = "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + "\n" + ""; - - private static final String cdData = "\n" - + "\n" - + "\n" - + "\t\n" - + "\t\tEmpire Burlesque\n" - + "\t\tBob Dylan\n" - + "\t\tUSA\n" - + "\t\tColumbia\n" - + "\t\t10.90\n" - + "\t\t1985\n" - + "\t\n" - + "\t\n" - + "\t\tHide your heart\n" - + "\t\tBonnie Tyler\n" - + "\t\tUK\n" - + "\t\tCBS Records\n" - + "\t\t9.90\n" - + "\t\t1988\n" - + "\t\n" - + "\t\n" - + "\t\tGreatest Hits\n" - + "\t\tDolly Parton\n" - + "\t\tUSA\n" - + "\t\tRCA\n" - + "\t\t9.90\n" - + "\t\t1982\n" + "\t\n" + "\t"; - - private static final String testXml = "12"; - - private static final String testXmlFlatten = "1B2"; -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,569 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed onT an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Assert; -import org.junit.Test; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - *

Test for XPathRecordReader

- * - * @version $Id: TestXPathRecordReader.java 822161 2009-10-06 08:11:56Z noble $ - * @since solr 1.3 - */ -public class TestXPathRecordReader { - @Test - public void basic() { - String xml="\n" - + " Hello C1\n" - + " Hello C1\n" - + " \n" - + " Hello C2\n" - + " \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/b"); - rr.addField("c", "/root/b/c", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(2, l.size()); - Assert.assertEquals(2, ((List) l.get(0).get("c")).size()); - Assert.assertEquals(1, ((List) l.get(1).get("c")).size()); - } - - @Test - public void attributes() { - String xml="\n" - + " \n" - + " \n" - + " \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/b"); - rr.addField("a", "/root/b/@a", false); - rr.addField("b", "/root/b/@b", false); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(3, l.size()); - Assert.assertEquals("x0", l.get(0).get("a")); - Assert.assertEquals("x1", l.get(1).get("a")); - Assert.assertEquals("x2", l.get(2).get("a")); - Assert.assertEquals("y0", l.get(0).get("b")); - Assert.assertEquals("y1", l.get(1).get("b")); - Assert.assertEquals("y2", l.get(2).get("b")); - } - - @Test - public void attrInRoot(){ - String xml="\n" + - "\n" + - " \n" + - " \n" + - " 301.46\n" + - " \n" + - "\n" + - " \n" + - " \n" + - " 302.46\n" + - " \n" + - "\n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/r/merchantProduct"); - rr.addField("id", "/r/merchantProduct/@id", false); - rr.addField("mid", "/r/merchantProduct/@mid", false); - rr.addField("price", "/r/merchantProduct/price", false); - rr.addField("conditionType", "/r/merchantProduct/condition/@type", false); - List> l = rr.getAllRecords(new StringReader(xml)); - Map m = l.get(0); - Assert.assertEquals("814636051", m.get("id")); - Assert.assertEquals("189973", m.get("mid")); - Assert.assertEquals("301.46", m.get("price")); - Assert.assertEquals("cond-0", m.get("conditionType")); - - m = l.get(1); - Assert.assertEquals("814636052", m.get("id")); - Assert.assertEquals("189974", m.get("mid")); - Assert.assertEquals("302.46", m.get("price")); - Assert.assertEquals("cond-1", m.get("conditionType")); - } - - @Test - public void attributes2Level() { - String xml="\n" - + "\n \n" - + " \n" - + " \n" - + " " - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a/b"); - rr.addField("a", "/root/a/b/@a", false); - rr.addField("b", "/root/a/b/@b", false); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(3, l.size()); - Assert.assertEquals("x0", l.get(0).get("a")); - Assert.assertEquals("y1", l.get(1).get("b")); - } - - @Test - public void attributes2LevelHetero() { - String xml="\n" - + "\n \n" - + " \n" - + " \n" - + " " - + "\n \n" - + " \n" - + " \n" - + " " - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a | /root/x"); - rr.addField("a", "/root/a/b/@a", false); - rr.addField("b", "/root/a/b/@b", false); - rr.addField("a", "/root/x/b/@a", false); - rr.addField("b", "/root/x/b/@b", false); - - final List> a = new ArrayList>(); - final List> x = new ArrayList>(); - rr.streamRecords(new StringReader(xml), new XPathRecordReader.Handler() { - public void handle(Map record, String xpath) { - if (record == null) return; - if (xpath.equals("/root/a")) a.add(record); - if (xpath.equals("/root/x")) x.add(record); - } - }); - - Assert.assertEquals(1, a.size()); - Assert.assertEquals(1, x.size()); - } - - @Test - public void attributes2LevelMissingAttrVal() { - String xml="\n" - + "\n \n" - + " \n" - + " " - + "\n \n" - + " \n" - + " " - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("a", "/root/a/b/@a", true); - rr.addField("b", "/root/a/b/@b", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(2, l.size()); - Assert.assertNull(((List) l.get(1).get("a")).get(1)); - Assert.assertNull(((List) l.get(1).get("b")).get(0)); - } - - @Test - public void elems2LevelMissing() { - String xml="\n" - + "\t\n" - + "\t \n\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t\n" - + "\t \n\t x3\n\t \n" - + "\t \n\t y4\n\t \n" - + "\t \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("a", "/root/a/b/x", true); - rr.addField("b", "/root/a/b/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(2, l.size()); - Assert.assertNull(((List) l.get(1).get("a")).get(1)); - Assert.assertNull(((List) l.get(1).get("b")).get(0)); - } - - @Test - public void mixedContent() { - String xml = "This text is \n" + - " bold and this text is \n" + - " underlined!\n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/p"); - rr.addField("p", "/p", true); - rr.addField("b", "/p/b", true); - rr.addField("u", "/p/u", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Map row = l.get(0); - - Assert.assertEquals("bold", ((List) row.get("b")).get(0)); - Assert.assertEquals("underlined", ((List) row.get("u")).get(0)); - String p = (String) ((List) row.get("p")).get(0); - Assert.assertTrue(p.contains("This text is")); - Assert.assertTrue(p.contains("and this text is")); - Assert.assertTrue(p.contains("!")); - // Should not contain content from child elements - Assert.assertFalse(p.contains("bold")); - } - - @Test - public void mixedContentFlattened() { - String xml = "This text is \n" + - " bold and this text is \n" + - " underlined!\n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/p"); - rr.addField("p", "/p", false, XPathRecordReader.FLATTEN); - List> l = rr.getAllRecords(new StringReader(xml)); - Map row = l.get(0); - Assert.assertEquals("This text is \n" + - " bold and this text is \n" + - " underlined!", ((String)row.get("p")).trim() ); - } - - @Test - public void elems2LevelWithAttrib() { - String xml = "\n\t\n\t \n" - + "\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n" - + "\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t \n\t \n" - + "\t x3\n" - + "\t \n" - + "\t \n" - + "\t y4\n" - + "\t \n" - + "\t \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("x", "/root/a/b[@k]/x", true); - rr.addField("y", "/root/a/b[@k]/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(2, l.size()); - Assert.assertEquals(2, ((List) l.get(0).get("x")).size()); - Assert.assertEquals(2, ((List) l.get(0).get("y")).size()); - Assert.assertEquals(0, l.get(1).size()); - } - - @Test - public void elems2LevelWithAttribMultiple() { - String xml="\n" - + "\t\n\t \n" - + "\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n" - + "\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t\n\t \n" - + "\t x3\n" - + "\t \n" - + "\t \n" - + "\t y4\n" - + "\t \n" - + "\t \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("x", "/root/a/b[@k][@m='n']/x", true); - rr.addField("y", "/root/a/b[@k][@m='n']/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(2, l.size()); - Assert.assertEquals(1, ((List) l.get(0).get("x")).size()); - Assert.assertEquals(1, ((List) l.get(0).get("y")).size()); - Assert.assertEquals(0, l.get(1).size()); - } - - @Test - public void elems2LevelWithAttribVal() { - String xml="\n\t\n \n" - + "\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n" - + "\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t \n x3\n" - + "\t y4\n" - + "\t\n" + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("x", "/root/a/b[@k='x']/x", true); - rr.addField("y", "/root/a/b[@k='x']/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(2, l.size()); - Assert.assertEquals(1, ((List) l.get(0).get("x")).size()); - Assert.assertEquals(1, ((List) l.get(0).get("y")).size()); - Assert.assertEquals(0, l.get(1).size()); - } - - @Test - public void attribValWithSlash() { - String xml = "\n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/b"); - rr.addField("x", "/root/b/a[@x='a/b']/@h", false); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(1, l.size()); - Map m = l.get(0); - Assert.assertEquals("hello-A", m.get("x")); - } - - @Test - public void unsupported_Xpaths() { - String xml = " "; - XPathRecordReader rr=null; - try { - rr = new XPathRecordReader("//b"); - Assert.fail("A RuntimeException was expected: //b forEach cannot begin with '//'."); - } - catch (RuntimeException ex) { } - try { - rr.addField("bold" ,"b", false); - Assert.fail("A RuntimeException was expected: 'b' xpaths must begin with '/'."); - } - catch (RuntimeException ex) { } - - } - - @Test - public void any_decendent_from_root() { - XPathRecordReader rr = new XPathRecordReader("/anyd/contenido"); - rr.addField("descdend", "//boo", true); - rr.addField("inr_descd","//boo/i", false); - rr.addField("cont", "/anyd/contenido", false); - rr.addField("id", "/anyd/contenido/@id", false); - rr.addField("status", "/anyd/status", false); - rr.addField("title", "/anyd/contenido/titulo", false,XPathRecordReader.FLATTEN); - rr.addField("resume", "/anyd/contenido/resumen",false); - rr.addField("text", "/anyd/contenido/texto", false); - - String xml="\n" - + " this top level is ignored because it is external to the forEach\n" - + " as is this element\n" - + " \n" - + " This one is not ignored as its inside a forEach\n" - + " big antler\n" - + " My flattened title \n" - + " My summary skip this! \n" - + " Within the body ofMy text\n" - + "

Access inner sub clauses as well

\n" - + "
\n" - + "
"; - - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(1, l.size()); - Map m = l.get(0); - Assert.assertEquals("This one is inside a forEach", m.get("cont").toString().trim()); - Assert.assertEquals("10097" ,m.get("id")); - Assert.assertEquals("My flattened title",m.get("title").toString().trim()); - Assert.assertEquals("My summary" ,m.get("resume").toString().trim()); - Assert.assertEquals("My text" ,m.get("text").toString().trim()); - Assert.assertEquals("not ignored as its",(String) ((List) m.get("descdend")).get(0) ); - Assert.assertEquals("antler" ,(String) ((List) m.get("descdend")).get(1) ); - Assert.assertEquals("Within the body of",(String) ((List) m.get("descdend")).get(2) ); - Assert.assertEquals("inner as well" ,(String) ((List) m.get("descdend")).get(3) ); - Assert.assertEquals("sub clauses" ,m.get("inr_descd").toString().trim()); - } - - @Test - public void any_decendent_of_a_child1() { - XPathRecordReader rr = new XPathRecordReader("/anycd"); - rr.addField("descdend", "/anycd//boo", true); - - // same test string as above but checking to see if *all* //boo's are collected - String xml="\n" - + " this top level is ignored because it is external to the forEach\n" - + " as is this element\n" - + " \n" - + " This one is not ignored as its inside a forEach\n" - + " big antler\n" - + " My flattened title \n" - + " My summary skip this! \n" - + " Within the body ofMy text\n" - + "

Access inner sub clauses as well

\n" - + "
\n" - + "
"; - - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(1, l.size()); - Map m = l.get(0); - Assert.assertEquals("top level" ,(String) ((List) m.get("descdend")).get(0) ); - Assert.assertEquals("this element" ,(String) ((List) m.get("descdend")).get(1) ); - Assert.assertEquals("not ignored as its",(String) ((List) m.get("descdend")).get(2) ); - Assert.assertEquals("antler" ,(String) ((List) m.get("descdend")).get(3) ); - Assert.assertEquals("title" ,(String) ((List) m.get("descdend")).get(4) ); - Assert.assertEquals("Within the body of",(String) ((List) m.get("descdend")).get(5) ); - Assert.assertEquals("inner as well" ,(String) ((List) m.get("descdend")).get(6) ); - } - - @Test - public void any_decendent_of_a_child2() { - XPathRecordReader rr = new XPathRecordReader("/anycd"); - rr.addField("descdend", "/anycd/contenido//boo", true); - - // same test string as above but checking to see if *some* //boo's are collected - String xml="\n" - + " this top level is ignored because it is external to the forEach\n" - + " as is this element\n" - + " \n" - + " This one is not ignored as its inside a forEach\n" - + " big antler\n" - + " My flattened title \n" - + " My summary skip this! \n" - + " Within the body ofMy text\n" - + "

Access inner sub clauses as well

\n" - + "
\n" - + "
"; - - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(1, l.size()); - Map m = l.get(0); - Assert.assertEquals("not ignored as its",((List) m.get("descdend")).get(0) ); - Assert.assertEquals("antler" ,((List) m.get("descdend")).get(1) ); - Assert.assertEquals("title" ,((List) m.get("descdend")).get(2) ); - Assert.assertEquals("Within the body of",((List) m.get("descdend")).get(3) ); - Assert.assertEquals("inner as well" ,((List) m.get("descdend")).get(4) ); - } - - @Test - public void another() { - String xml="\n" - + " \n" - + " \n" - + " This is my title \n" - + " This is my summary \n" - + " This is the body of my text \n" - + " \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/contenido"); - rr.addField("id", "/root/contenido/@id", false); - rr.addField("title", "/root/contenido/titulo", false); - rr.addField("resume","/root/contenido/resumen",false); - rr.addField("text", "/root/contenido/texto", false); - - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals(1, l.size()); - Map m = l.get(0); - Assert.assertEquals("10097", m.get("id")); - Assert.assertEquals("This is my title", m.get("title").toString().trim()); - Assert.assertEquals("This is my summary", m.get("resume").toString().trim()); - Assert.assertEquals("This is the body of my text", m.get("text").toString() - .trim()); - } - - @Test - public void sameForEachAndXpath(){ - String xml="\n" + - " \n" + - " hello\n" + - " \n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/cat/name"); - rr.addField("catName", "/root/cat/name",false); - List> l = rr.getAllRecords(new StringReader(xml)); - Assert.assertEquals("hello",l.get(0).get("catName")); - } - - @Test - public void putNullTest(){ - String xml = "\n" + - " \n" + - " \n" + - "
A.1.1\n" + - " B.1.1\n" + - " \n" + - " \n" + - " B.1.2\n" + - " C.1.2\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " A.2.1\n" + - " C.2.1\n" + - " \n" + - " \n" + - " B.2.2\n" + - " C.2.2\n" + - " \n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/i"); - rr.addField("a", "/root/i/x/a", true); - rr.addField("b", "/root/i/x/b", true); - rr.addField("c", "/root/i/x/c", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Map map = l.get(0); - List a = (List) map.get("a"); - List b = (List) map.get("b"); - List c = (List) map.get("c"); - - Assert.assertEquals("A.1.1",a.get(0)); - Assert.assertEquals("B.1.1",b.get(0)); - Assert.assertNull(c.get(0)); - - Assert.assertNull(a.get(1)); - Assert.assertEquals("B.1.2",b.get(1)); - Assert.assertEquals("C.1.2",c.get(1)); - - map = l.get(1); - a = (List) map.get("a"); - b = (List) map.get("b"); - c = (List) map.get("c"); - Assert.assertEquals("A.2.1",a.get(0)); - Assert.assertNull(b.get(0)); - Assert.assertEquals("C.2.1",c.get(0)); - - Assert.assertNull(a.get(1)); - Assert.assertEquals("B.2.2",b.get(1)); - Assert.assertEquals("C.2.2",c.get(1)); - } - - - @Test - public void testError(){ - String malformedXml = "\n" + - " \n" + - " 1\n" + - " test1\n" + - " \n" + - " \n" + - " 2\n" + - " test2\n" + - " \n" + - " \n" + - " 3\n" + // invalid XML - " test3\n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/node"); - rr.addField("id", "/root/node/id", true); - rr.addField("desc", "/root/node/desc", true); - try { - rr.getAllRecords(new StringReader(malformedXml)); - Assert.fail("A RuntimeException was expected: the input XML is invalid."); - } catch (Exception e) { } - } -} diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/contentstream-solrconfig.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/contentstream-solrconfig.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/contentstream-solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/contentstream-solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,408 +0,0 @@ - - - - - - ${solr.abortOnConfigurationError:true} - - - ${solr.data.dir:./solr/data} - - - - - false - - 10 - - - - 32 - 2147483647 - 10000 - 1000 - 10000 - - - - - - - - - - - single - - - - - false - 32 - 10 - - - 2147483647 - 10000 - - - false - - - - - - - - - 100000 - - - - - - - 1024 - - - - - - - - - - - - - true - - - - - - - - 50 - - - 200 - - - - - - - - - solr 0 10 - rocks 0 10 - static newSearcher warming query from solrconfig.xml - - - - - - - - - - - false - - - 4 - - - - - - - - - - - - - - - - - - - - - - - explicit - - - - - - - data-config.xml - - - - - - - - - explicit - - - - - - - - - - - - *:* - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataconfig-contentstream.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataconfig-contentstream.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataconfig-contentstream.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataconfig-contentstream.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-datasource.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-datasource.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-datasource.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-datasource.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-transformer.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-transformer.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-transformer.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/data-config-with-transformer.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-nodatasource-solrconfig.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-nodatasource-solrconfig.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-nodatasource-solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-nodatasource-solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,404 +0,0 @@ - - - - - - ${solr.abortOnConfigurationError:true} - - - ${solr.data.dir:./solr/data} - - - - - false - - 10 - - - - 32 - 2147483647 - 10000 - 1000 - 10000 - - - - - - - - - - - single - - - - - false - 32 - 10 - - - 2147483647 - 10000 - - - false - - - - - - - - - 100000 - - - - - - - 1024 - - - - - - - - - - - - - true - - - - - - - - 50 - - - 200 - - - - - - - - - solr 0 10 - rocks 0 10 - static newSearcher warming query from solrconfig.xml - - - - - - - - - - - false - - - 4 - - - - - - - - - - - - - - - - - - - - - - - explicit - - - - - - - - - - - - explicit - - - - - - - - - - - - *:* - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-schema.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-schema.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-schema.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-schema.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,304 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - desc - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solrconfig.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solrconfig.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,409 +0,0 @@ - - - - - - ${solr.abortOnConfigurationError:true} - - - ${solr.data.dir:./solr/data} - - - - - false - - 10 - - - - 32 - 2147483647 - 10000 - 1000 - 10000 - - - - - - - - - - - single - - - - - false - 32 - 10 - - - 2147483647 - 10000 - - - false - - - - - - - - - 100000 - - - - - - - 1024 - - - - - - - - - - - - - true - - - - - - - - 50 - - - 200 - - - - - - - - - solr 0 10 - rocks 0 10 - static newSearcher warming query from solrconfig.xml - - - - - - - - - - - false - - - 4 - - - - - - - - - - - - - - - - - - - - - - - explicit - - - - - - - - MockDataSource - - - - - - - - - explicit - - - - - - - - - - - - *:* - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solr_id-schema.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solr_id-schema.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solr_id-schema.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-solr_id-schema.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,304 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - solr_id - - - desc - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/protwords.txt solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/protwords.txt --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/protwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/protwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#use a protected word file to avoid stemming two -#unrelated words to the same base word. -#to test, we will use words that would normally obviously be stemmed. -cats -ridding diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/single-entity-data-config.xml solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/single-entity-data-config.xml --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/single-entity-data-config.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/single-entity-data-config.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/stopwords.txt solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/stopwords.txt --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/stopwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/stopwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -stopworda -stopwordb diff -Nru solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/synonyms.txt solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/synonyms.txt --- solr-1.4.0+ds1/contrib/dataimporthandler/src/test/resources/solr/conf/synonyms.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/dataimporthandler/src/test/resources/solr/conf/synonyms.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -a => aa -b => b1 b2 -c => c1,c2 -a\=>a => b\=>b -a\,a => b\,b -foo,bar,baz - -Television,TV,Televisions diff -Nru solr-1.4.0+ds1/contrib/extraction/build.xml solr-3.3/contrib/extraction/build.xml --- solr-1.4.0+ds1/contrib/extraction/build.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/build.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,139 +0,0 @@ - - - - - - - - - - - - - - Solr Integration with Tika for extracting content from binary file formats such as Microsoft Word and Adobe PDF. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/extraction/CHANGES.txt solr-3.3/contrib/extraction/CHANGES.txt --- solr-1.4.0+ds1/contrib/extraction/CHANGES.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/CHANGES.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -Apache Solr Content Extraction Library (Solr Cell) - Release Notes - -This file describes changes to the Solr Cell (contrib/extraction) module. See SOLR-284 for details. - -Introduction ------------- - -Apache Solr Extraction provides a means for extracting and indexing content contained in "rich" documents, such -as Microsoft Word, Adobe PDF, etc. (Each name is a trademark of their respective owners) This contrib module -uses Apache Tika to extract content and metadata from the files, which can then be indexed. For more information, -see http://wiki.apache.org/solr/ExtractingRequestHandler - -Getting Started ---------------- -You will need Solr up and running. Then, simply add the extraction JAR file, plus the Tika dependencies (in the ./lib folder) -to your Solr Home lib directory. See http://wiki.apache.org/solr/ExtractingRequestHandler for more details on hooking it in - and configuring. - - -$Id:$ -================== Release 1.4.0 ================== - -1. SOLR-284: Added in support for extraction. (Eric Pugh, Chris Harris, gsingers) - -2. SOLR-284: Removed "silent success" key generation (gsingers) - -3. SOLR-1075: Upgrade to Tika 0.3. See http://www.apache.org/dist/lucene/tika/CHANGES-0.3.txt (gsingers) - -4. SOLR-1128: Added metadata output to "extract only" option. (gsingers) - -5. SOLR-1310: Upgrade to Tika 0.4. Note there are some differences in detecting Languages now. - See http://www.lucidimagination.com/search/document/d6f1899a85b2a45c/vote_apache_tika_0_4_release_candidate_2#d6f1899a85b2a45c - for discussion on language detection. - See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. (gsingers) - -6. SOLR-1274: Added text serialization output for extractOnly (Peter Wolanin, gsingers) \ No newline at end of file diff -Nru solr-1.4.0+ds1/contrib/extraction/solr-cell-pom.xml.template solr-3.3/contrib/extraction/solr-cell-pom.xml.template --- solr-1.4.0+ds1/contrib/extraction/solr-cell-pom.xml.template 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/solr-cell-pom.xml.template 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @maven_version@ - - - org.apache.solr - solr-cell - Apache Solr Content Extraction Library - @maven_version@ - Apache Solr Content Extraction Library integrates Apache Tika content extraction framework into Solr - jar - - - - org.apache.tika - tika-core - 0.4 - - - org.apache.tika - tika-parsers - 0.4 - - - diff -Nru solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java --- solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,221 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.extraction; - -import org.apache.commons.io.IOUtils; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.params.UpdateParams; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.request.SolrQueryResponse; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.update.AddUpdateCommand; -import org.apache.solr.update.processor.UpdateRequestProcessor; -import org.apache.solr.handler.ContentStreamLoader; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.XHTMLContentHandler; -import org.apache.tika.sax.xpath.Matcher; -import org.apache.tika.sax.xpath.MatchingContentHandler; -import org.apache.tika.sax.xpath.XPathParser; -import org.apache.tika.exception.TikaException; -import org.apache.xml.serialize.OutputFormat; -import org.apache.xml.serialize.BaseMarkupSerializer; -import org.apache.xml.serialize.XMLSerializer; -import org.apache.xml.serialize.TextSerializer; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; - - -/** - * The class responsible for loading extracted content into Solr. - * - **/ -public class ExtractingDocumentLoader extends ContentStreamLoader { - /** - * Extract Only supported format - */ - public static final String TEXT_FORMAT = "text"; - /** - * Extract Only supported format. Default - */ - public static final String XML_FORMAT = "xml"; - /** - * XHTML XPath parser. - */ - private static final XPathParser PARSER = - new XPathParser("xhtml", XHTMLContentHandler.XHTML); - - final IndexSchema schema; - final SolrParams params; - final UpdateRequestProcessor processor; - protected AutoDetectParser autoDetectParser; - - private final AddUpdateCommand templateAdd; - - protected TikaConfig config; - protected SolrContentHandlerFactory factory; - //protected Collection dateFormats = DateUtil.DEFAULT_DATE_FORMATS; - - public ExtractingDocumentLoader(SolrQueryRequest req, UpdateRequestProcessor processor, - TikaConfig config, SolrContentHandlerFactory factory) { - this.params = req.getParams(); - schema = req.getSchema(); - this.config = config; - this.processor = processor; - - templateAdd = new AddUpdateCommand(); - templateAdd.allowDups = false; - templateAdd.overwriteCommitted = true; - templateAdd.overwritePending = true; - - if (params.getBool(UpdateParams.OVERWRITE, true)) { - templateAdd.allowDups = false; - templateAdd.overwriteCommitted = true; - templateAdd.overwritePending = true; - } else { - templateAdd.allowDups = true; - templateAdd.overwriteCommitted = false; - templateAdd.overwritePending = false; - } - //this is lightweight - autoDetectParser = new AutoDetectParser(config); - this.factory = factory; - } - - - /** - * this must be MT safe... may be called concurrently from multiple threads. - * - * @param - * @param - */ - void doAdd(SolrContentHandler handler, AddUpdateCommand template) - throws IOException { - template.solrDoc = handler.newDocument(); - processor.processAdd(template); - } - - void addDoc(SolrContentHandler handler) throws IOException { - templateAdd.indexedId = null; - doAdd(handler, templateAdd); - } - - /** - * @param req - * @param stream - * @throws java.io.IOException - */ - public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws IOException { - errHeader = "ExtractingDocumentLoader: " + stream.getSourceInfo(); - Parser parser = null; - String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null); - if (streamType != null) { - //Cache? Parsers are lightweight to construct and thread-safe, so I'm told - parser = config.getParser(streamType.trim().toLowerCase()); - } else { - parser = autoDetectParser; - } - if (parser != null) { - Metadata metadata = new Metadata(); - metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName()); - metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo()); - metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize())); - metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType()); - - // If you specify the resource name (the filename, roughly) with this parameter, - // then Tika can make use of it in guessing the appropriate MIME type: - String resourceName = req.getParams().get(ExtractingParams.RESOURCE_NAME, null); - if (resourceName != null) { - metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName); - } - - SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema); - InputStream inputStream = null; - try { - inputStream = stream.getStream(); - String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION); - boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false); - ContentHandler parsingHandler = handler; - - StringWriter writer = null; - BaseMarkupSerializer serializer = null; - if (extractOnly == true) { - String extractFormat = params.get(ExtractingParams.EXTRACT_FORMAT, "xml"); - writer = new StringWriter(); - if (extractFormat.equals(TEXT_FORMAT)) { - serializer = new TextSerializer(); - serializer.setOutputCharStream(writer); - serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true)); - } else { - serializer = new XMLSerializer(writer, new OutputFormat("XML", "UTF-8", true)); - } - if (xpathExpr != null) { - Matcher matcher = - PARSER.parse(xpathExpr); - serializer.startDocument();//The MatchingContentHandler does not invoke startDocument. See http://tika.markmail.org/message/kknu3hw7argwiqin - parsingHandler = new MatchingContentHandler(serializer, matcher); - } else { - parsingHandler = serializer; - } - } else if (xpathExpr != null) { - Matcher matcher = - PARSER.parse(xpathExpr); - parsingHandler = new MatchingContentHandler(handler, matcher); - } //else leave it as is - - //potentially use a wrapper handler for parsing, but we still need the SolrContentHandler for getting the document. - parser.parse(inputStream, parsingHandler, metadata); - if (extractOnly == false) { - addDoc(handler); - } else { - //serializer is not null, so we need to call endDoc on it if using xpath - if (xpathExpr != null){ - serializer.endDocument(); - } - rsp.add(stream.getName(), writer.toString()); - writer.close(); - String[] names = metadata.names(); - NamedList metadataNL = new NamedList(); - for (int i = 0; i < names.length; i++) { - String[] vals = metadata.getValues(names[i]); - metadataNL.add(names[i], vals); - } - rsp.add(stream.getName() + "_metadata", metadataNL); - } - } catch (SAXException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); - } catch (TikaException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); - } finally { - IOUtils.closeQuietly(inputStream); - } - } else { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stream type of " + streamType + " didn't match any known parsers. Please supply the " + ExtractingParams.STREAM_TYPE + " parameter."); - } - } - - -} diff -Nru solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java --- solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingMetadataConstants.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.extraction; - - -/** - * Constants used internally by the {@link ExtractingRequestHandler}. - * - **/ -public interface ExtractingMetadataConstants { - String STREAM_NAME = "stream_name"; - String STREAM_SOURCE_INFO = "stream_source_info"; - String STREAM_SIZE = "stream_size"; - String STREAM_CONTENT_TYPE = "stream_content_type"; -} diff -Nru solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java --- solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingParams.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,142 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.extraction; - - -/** - * The various Solr Parameters names to use when extracting content. - * - **/ -public interface ExtractingParams { - - /** - * Map all generated attribute names to field names with lowercase and underscores. - */ - public static final String LOWERNAMES = "lowernames"; - - - /** - * The param prefix for mapping Tika metadata to Solr fields. - *

- * To map a field, add a name like: - *

fmap.title=solr.title
- * - * In this example, the tika "title" metadata value will be added to a Solr field named "solr.title" - * - * - */ - public static final String MAP_PREFIX = "fmap."; - - /** - * The boost value for the name of the field. The boost can be specified by a name mapping. - *

- * For example - *

-   * map.title=solr.title
-   * boost.solr.title=2.5
-   * 
- * will boost the solr.title field for this document by 2.5 - * - */ - public static final String BOOST_PREFIX = "boost."; - - /** - * Pass in literal values to be added to the document, as in - *
-   *  literal.myField=Foo 
-   * 
- * - */ - public static final String LITERALS_PREFIX = "literal."; - - - /** - * Restrict the extracted parts of a document to be indexed - * by passing in an XPath expression. All content that satisfies the XPath expr. - * will be passed to the {@link SolrContentHandler}. - *

- * See Tika's docs for what the extracted document looks like. - *

- * @see #CAPTURE_ELEMENTS - */ - public static final String XPATH_EXPRESSION = "xpath"; - - - /** - * Only extract and return the content, do not index it. - */ - public static final String EXTRACT_ONLY = "extractOnly"; - - /** - * Content output format if extractOnly is true. Default is "xml", alternative is "text". - */ - public static final String EXTRACT_FORMAT = "extractFormat"; - - /** - * Capture attributes separately according to the name of the element, instead of just adding them to the string buffer - */ - public static final String CAPTURE_ATTRIBUTES = "captureAttr"; - - - /** - * Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default. This is different - * then the case of passing in an XPath expression. - *

- * The Capture field is based on the localName returned to the {@link SolrContentHandler} - * by Tika, not to be confused by the mapped field. The field name can then - * be mapped into the index schema. - *

- * For instance, a Tika document may look like: - *

-   *  <html>
-   *    ...
-   *    <body>
-   *      <p>some text here.  <div>more text</div></p>
-   *      Some more text
-   *    </body>
-   * 
- * By passing in the p tag, you could capture all P tags separately from the rest of the t - * Thus, in the example, the capture of the P tag would be: "some text here. more text" - * - */ - public static final String CAPTURE_ELEMENTS = "capture"; - - /** - * The type of the stream. If not specified, Tika will use mime type detection. - */ - public static final String STREAM_TYPE = "stream.type"; - - - /** - * Optional. The file name. If specified, Tika can take this into account while - * guessing the MIME type. - */ - public static final String RESOURCE_NAME = "resource.name"; - - - /** - * Optional. If specified, the prefix will be prepended to all Metadata, such that it would be possible - * to setup a dynamic field to automatically capture it - */ - public static final String UNKNOWN_FIELD_PREFIX = "uprefix"; - - /** - * Optional. If specified and the name of a potential field cannot be determined, the default Field specified - * will be used instead. - */ - public static final String DEFAULT_FIELD = "defaultField"; -} diff -Nru solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java --- solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,135 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.extraction; - - -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.util.DateUtil; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.update.processor.UpdateRequestProcessor; -import org.apache.solr.util.plugin.SolrCoreAware; -import org.apache.solr.handler.ContentStreamHandlerBase; -import org.apache.solr.handler.ContentStreamLoader; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.exception.TikaException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.util.Collection; -import java.util.HashSet; - - -/** - * Handler for rich documents like PDF or Word or any other file format that Tika handles that need the text to be extracted - * first from the document. - *

- */ -public class ExtractingRequestHandler extends ContentStreamHandlerBase implements SolrCoreAware { - - private transient static Logger log = LoggerFactory.getLogger(ExtractingRequestHandler.class); - - public static final String CONFIG_LOCATION = "tika.config"; - public static final String DATE_FORMATS = "date.formats"; - - protected TikaConfig config; - - - protected Collection dateFormats = DateUtil.DEFAULT_DATE_FORMATS; - protected SolrContentHandlerFactory factory; - - - @Override - public void init(NamedList args) { - super.init(args); - } - - public void inform(SolrCore core) { - if (initArgs != null) { - //if relative,then relative to config dir, otherwise, absolute path - String tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION); - if (tikaConfigLoc != null) { - File configFile = new File(tikaConfigLoc); - if (configFile.isAbsolute() == false) { - configFile = new File(core.getResourceLoader().getConfigDir(), configFile.getPath()); - } - try { - config = new TikaConfig(configFile); - } catch (Exception e) { - throw new SolrException(ErrorCode.SERVER_ERROR, e); - } - } else { - try { - config = TikaConfig.getDefaultConfig(); - } catch (TikaException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, e); - } - } - NamedList configDateFormats = (NamedList) initArgs.get(DATE_FORMATS); - if (configDateFormats != null && configDateFormats.size() > 0) { - dateFormats = new HashSet(); - while (configDateFormats.iterator().hasNext()) { - String format = (String) configDateFormats.iterator().next(); - log.info("Adding Date Format: " + format); - dateFormats.add(format); - } - } - } else { - try { - config = TikaConfig.getDefaultConfig(); - } catch (TikaException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, e); - } - } - factory = createFactory(); - } - - protected SolrContentHandlerFactory createFactory() { - return new SolrContentHandlerFactory(dateFormats); - } - - - protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProcessor processor) { - return new ExtractingDocumentLoader(req, processor, config, factory); - } - - // ////////////////////// SolrInfoMBeans methods ////////////////////// - @Override - public String getDescription() { - return "Add/Update Rich document"; - } - - @Override - public String getVersion() { - return "$Revision:$"; - } - - @Override - public String getSourceId() { - return "$Id:$"; - } - - @Override - public String getSource() { - return "$URL:$"; - } -} - - diff -Nru solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java --- solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.extraction; - -import org.apache.tika.metadata.Metadata; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.schema.IndexSchema; - -import java.util.Collection; - - -/** - * - * - **/ -public class SolrContentHandlerFactory { - protected Collection dateFormats; - - public SolrContentHandlerFactory(Collection dateFormats) { - this.dateFormats = dateFormats; - } - - public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) { - return new SolrContentHandler(metadata, params, schema, - dateFormats); - } -} diff -Nru solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java --- solr-1.4.0+ds1/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,308 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.handler.extraction; - -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.DateUtil; -import org.apache.solr.schema.DateField; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.SchemaField; -import org.apache.tika.metadata.Metadata; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -import java.text.DateFormat; -import java.util.*; - - -/** - * The class responsible for handling Tika events and translating them into {@link org.apache.solr.common.SolrInputDocument}s. - * This class is not thread-safe. - *

- *

- * User's may wish to override this class to provide their own functionality. - * - * @see org.apache.solr.handler.extraction.SolrContentHandlerFactory - * @see org.apache.solr.handler.extraction.ExtractingRequestHandler - * @see org.apache.solr.handler.extraction.ExtractingDocumentLoader - */ -public class SolrContentHandler extends DefaultHandler implements ExtractingParams { - private transient static Logger log = LoggerFactory.getLogger(SolrContentHandler.class); - private SolrInputDocument document; - - private Collection dateFormats = DateUtil.DEFAULT_DATE_FORMATS; - - private Metadata metadata; - private SolrParams params; - private StringBuilder catchAllBuilder = new StringBuilder(2048); - private IndexSchema schema; - private Map fieldBuilders = Collections.emptyMap(); - private LinkedList bldrStack = new LinkedList(); - - private boolean captureAttribs; - private boolean lowerNames; - private String contentFieldName = "content"; - - private String unknownFieldPrefix = ""; - private String defaultField = ""; - - public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) { - this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS); - } - - - public SolrContentHandler(Metadata metadata, SolrParams params, - IndexSchema schema, Collection dateFormats) { - document = new SolrInputDocument(); - this.metadata = metadata; - this.params = params; - this.schema = schema; - this.dateFormats = dateFormats; - - this.lowerNames = params.getBool(LOWERNAMES, false); - this.captureAttribs = params.getBool(CAPTURE_ATTRIBUTES, false); - this.unknownFieldPrefix = params.get(UNKNOWN_FIELD_PREFIX, ""); - this.defaultField = params.get(DEFAULT_FIELD, ""); - String[] captureFields = params.getParams(CAPTURE_ELEMENTS); - if (captureFields != null && captureFields.length > 0) { - fieldBuilders = new HashMap(); - for (int i = 0; i < captureFields.length; i++) { - fieldBuilders.put(captureFields[i], new StringBuilder()); - } - } - bldrStack.add(catchAllBuilder); - } - - - /** - * This is called by a consumer when it is ready to deal with a new SolrInputDocument. Overriding - * classes can use this hook to add in or change whatever they deem fit for the document at that time. - * The base implementation adds the metadata as fields, allowing for potential remapping. - * - * @return The {@link org.apache.solr.common.SolrInputDocument}. - */ - public SolrInputDocument newDocument() { - float boost = 1.0f; - //handle the metadata extracted from the document - for (String name : metadata.names()) { - String[] vals = metadata.getValues(name); - addField(name, null, vals); - } - - //handle the literals from the params - Iterator paramNames = params.getParameterNamesIterator(); - while (paramNames.hasNext()) { - String pname = paramNames.next(); - if (!pname.startsWith(LITERALS_PREFIX)) continue; - - String name = pname.substring(LITERALS_PREFIX.length()); - addField(name, null, params.getParams(pname)); - } - - - //add in the content - addField(contentFieldName, catchAllBuilder.toString(), null); - - //add in the captured content - for (Map.Entry entry : fieldBuilders.entrySet()) { - if (entry.getValue().length() > 0) { - addField(entry.getKey(), entry.getValue().toString(), null); - } - } - if (log.isDebugEnabled()) { - log.debug("Doc: " + document); - } - return document; - } - - // Naming rules: - // 1) optionally map names to nicenames (lowercase+underscores) - // 2) execute "map" commands - // 3) if resulting field is unknown, map it to a common prefix - private void addField(String fname, String fval, String[] vals) { - if (lowerNames) { - StringBuilder sb = new StringBuilder(); - for (int i=0; i 0) { - name = unknownFieldPrefix + name; - sf = schema.getFieldOrNull(name); - } else if (sf == null && defaultField.length() > 0 && name.equals(Metadata.RESOURCE_NAME_KEY) == false /*let the fall through below handle this*/){ - name = defaultField; - sf = schema.getFieldOrNull(name); - } - - // Arguably we should handle this as a special case. Why? Because unlike basically - // all the other fields in metadata, this one was probably set not by Tika by in - // ExtractingDocumentLoader.load(). You shouldn't have to define a mapping for this - // field just because you specified a resource.name parameter to the handler, should - // you? - if (sf == null && unknownFieldPrefix.length()==0 && name == Metadata.RESOURCE_NAME_KEY) { - return; - } - - // normalize val params so vals.length>1 - if (vals != null && vals.length==1) { - fval = vals[0]; - vals = null; - } - - // single valued field with multiple values... catenate them. - if (sf != null && !sf.multiValued() && vals != null) { - StringBuilder builder = new StringBuilder(); - boolean first=true; - for (String val : vals) { - if (first) { - first=false; - } else { - builder.append(' '); - } - builder.append(val); - } - fval = builder.toString(); - vals=null; - } - - float boost = getBoost(name); - - if (fval != null) { - document.addField(name, transformValue(fval, sf), boost); - } - - if (vals != null) { - for (String val : vals) { - document.addField(name, transformValue(val, sf), boost); - } - } - - // no value set - throw exception for debugging - // if (vals==null && fval==null) throw new RuntimeException(name + " has no non-null value "); - } - - - @Override - public void startDocument() throws SAXException { - document.clear(); - catchAllBuilder.setLength(0); - for (StringBuilder builder : fieldBuilders.values()) { - builder.setLength(0); - } - bldrStack.clear(); - bldrStack.add(catchAllBuilder); - } - - - @Override - public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { - StringBuilder theBldr = fieldBuilders.get(localName); - if (theBldr != null) { - //we need to switch the currentBuilder - bldrStack.add(theBldr); - } - if (captureAttribs == true) { - for (int i = 0; i < attributes.getLength(); i++) { - addField(localName, attributes.getValue(i), null); - } - } else { - for (int i = 0; i < attributes.getLength(); i++) { - bldrStack.getLast().append(attributes.getValue(i)).append(' '); - } - } - bldrStack.getLast().append(' '); - } - - @Override - public void endElement(String uri, String localName, String qName) throws SAXException { - StringBuilder theBldr = fieldBuilders.get(localName); - if (theBldr != null) { - //pop the stack - bldrStack.removeLast(); - assert (bldrStack.size() >= 1); - } - bldrStack.getLast().append(' '); - } - - - @Override - public void characters(char[] chars, int offset, int length) throws SAXException { - bldrStack.getLast().append(chars, offset, length); - } - - - /** - * Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField} - *

- * This implementation only formats dates using the {@link org.apache.solr.common.util.DateUtil}. - * - * @param val The value to transform - * @param schFld The {@link org.apache.solr.schema.SchemaField} - * @return The potentially new value. - */ - protected String transformValue(String val, SchemaField schFld) { - String result = val; - if (schFld != null && schFld.getType() instanceof DateField) { - //try to transform the date - try { - Date date = DateUtil.parseDate(val, dateFormats); - DateFormat df = DateUtil.getThreadLocalDateFormat(); - result = df.format(date); - - } catch (Exception e) { - // Let the specific fieldType handle errors - // throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid value: " + val + " for field: " + schFld, e); - } - } - return result; - } - - - /** - * Get the value of any boost factor for the mapped name. - * - * @param name The name of the field to see if there is a boost specified - * @return The boost value - */ - protected float getBoost(String name) { - return params.getFloat(BOOST_PREFIX + name, 1.0f); - } - - /** - * Get the name mapping - * - * @param name The name to check to see if there is a mapping - * @return The new name, if there is one, else name - */ - protected String findMappedName(String name) { - return params.get(MAP_PREFIX + name, name); - } - -} diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java solr-3.3/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java --- solr-1.4.0+ds1/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,338 +0,0 @@ -package org.apache.solr.handler; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.solr.util.AbstractSolrTestCase; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryResponse; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.ContentStreamBase; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.SolrException; -import org.apache.solr.handler.extraction.ExtractingParams; -import org.apache.solr.handler.extraction.ExtractingRequestHandler; -import org.apache.solr.handler.extraction.ExtractingDocumentLoader; - -import java.util.List; -import java.util.ArrayList; -import java.io.File; - - -/** - * - * - **/ -public class ExtractingRequestHandlerTest extends AbstractSolrTestCase { - @Override - public String getSchemaFile() { - return "schema.xml"; - } - - @Override - public String getSolrConfigFile() { - return "solrconfig.xml"; - } - - - public void testExtraction() throws Exception { - ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract"); - assertTrue("handler is null and it shouldn't be", handler != null); - loadLocal("solr-word.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "fmap.content", "extractedContent", - "literal.id", "one", - "fmap.Last-Modified", "extractedDate" - ); - assertQ(req("title:solr-word"), "//*[@numFound='0']"); - assertU(commit()); - assertQ(req("title:solr-word"), "//*[@numFound='1']"); - - - loadLocal("simple.html", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "fmap.language", "extractedLanguage", - "literal.id", "two", - "fmap.content", "extractedContent", - "fmap.Last-Modified", "extractedDate" - ); - assertQ(req("title:Welcome"), "//*[@numFound='0']"); - assertU(commit()); - assertQ(req("title:Welcome"), "//*[@numFound='1']"); - - - loadLocal("simple.html", - "literal.id","simple2", - "uprefix", "t_", - "lowernames", "true", - "captureAttr", "true", - "fmap.a","t_href", - "fmap.content_type", "abcxyz", // test that lowernames is applied before mapping, and uprefix is applied after mapping - "commit", "true" // test immediate commit - ); - - // test that purposely causes a failure to print out the doc for test debugging - // assertQ(req("q","id:simple2","indent","true"), "//*[@numFound='0']"); - - // test both lowernames and unknown field mapping - //assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']"); - assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']"); - assertQ(req("+id:simple2 +t_abcxyz:[* TO *]"), "//*[@numFound='1']"); - - // load again in the exact same way, but boost one field - loadLocal("simple.html", - "literal.id","simple3", - "uprefix", "t_", - "lowernames", "true", - "captureAttr", "true", "fmap.a","t_href", - "commit", "true" - - ,"boost.t_href", "100.0" - ); - - assertQ(req("t_href:http"), "//*[@numFound='2']"); - assertQ(req("t_href:http"), "//doc[1]/str[.='simple3']"); - assertQ(req("+id:simple3 +t_content_type:[* TO *]"), "//*[@numFound='1']");//test lowercase and then uprefix - - // test capture - loadLocal("simple.html", - "literal.id","simple4", - "uprefix", "t_", - "capture","p", // capture only what is in the title element - "commit", "true" - ); - assertQ(req("+id:simple4 +t_content:Solr"), "//*[@numFound='1']"); - assertQ(req("+id:simple4 +t_p:\"here is some text\""), "//*[@numFound='1']"); - - loadLocal("version_control.xml", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "literal.id", "three", - "fmap.content", "extractedContent", - "fmap.language", "extractedLanguage", - "fmap.Last-Modified", "extractedDate" - ); - assertQ(req("stream_name:version_control.xml"), "//*[@numFound='0']"); - assertU(commit()); - assertQ(req("stream_name:version_control.xml"), "//*[@numFound='1']"); - - - } - - public void testDefaultField() throws Exception { - ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract"); - assertTrue("handler is null and it shouldn't be", handler != null); - try { - loadLocal("simple.html", - "literal.id","simple2", - "lowernames", "true", - "captureAttr", "true", - //"fmap.content_type", "abcxyz", - "commit", "true" // test immediate commit - ); - assertTrue(false); - - } catch (SolrException e) { - //do nothing - } - - - loadLocal("simple.html", - "literal.id","simple2", - ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified - "lowernames", "true", - "captureAttr", "true", - //"fmap.content_type", "abcxyz", - "commit", "true" // test immediate commit - ); - assertQ(req("id:simple2"), "//*[@numFound='1']"); - assertQ(req("defaultExtr:http\\://www.apache.org"), "//*[@numFound='1']"); - - //Test when both uprefix and default are specified. - loadLocal("simple.html", - "literal.id","simple2", - ExtractingParams.DEFAULT_FIELD, "defaultExtr",//test that unmapped fields go to the text field when no uprefix is specified - ExtractingParams.UNKNOWN_FIELD_PREFIX, "t_", - "lowernames", "true", - "captureAttr", "true", - "fmap.a","t_href", - //"fmap.content_type", "abcxyz", - "commit", "true" // test immediate commit - ); - assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']"); - } - - - public void testLiterals() throws Exception { - ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract"); - assertTrue("handler is null and it shouldn't be", handler != null); - //test literal - loadLocal("version_control.xml", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "fmap.content", "extractedContent", - "literal.id", "one", - "fmap.language", "extractedLanguage", - "literal.extractionLiteralMV", "one", - "literal.extractionLiteralMV", "two", - "fmap.Last-Modified", "extractedDate" - - ); - assertQ(req("stream_name:version_control.xml"), "//*[@numFound='0']"); - assertU(commit()); - assertQ(req("stream_name:version_control.xml"), "//*[@numFound='1']"); - - assertQ(req("extractionLiteralMV:one"), "//*[@numFound='1']"); - assertQ(req("extractionLiteralMV:two"), "//*[@numFound='1']"); - - try { - loadLocal("version_control.xml", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "fmap.content", "extractedContent", - "literal.id", "two", - "fmap.language", "extractedLanguage", - "literal.extractionLiteral", "one", - "literal.extractionLiteral", "two", - "fmap.Last-Modified", "extractedDate" - ); - // TODO: original author did not specify why an exception should be thrown... how to fix? - // assertTrue("Exception should have been thrown", false); - } catch (SolrException e) { - //nothing to see here, move along - } - - loadLocal("version_control.xml", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "fmap.content", "extractedContent", - "literal.id", "three", - "fmap.language", "extractedLanguage", - "literal.extractionLiteral", "one", - "fmap.Last-Modified", "extractedDate" - ); - assertU(commit()); - assertQ(req("extractionLiteral:one"), "//*[@numFound='1']"); - - } - - - public void testPlainTextSpecifyingMimeType() throws Exception { - ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract"); - assertTrue("handler is null and it shouldn't be", handler != null); - - // Load plain text specifying MIME type: - loadLocal("version_control.txt", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "literal.id", "one", - "fmap.language", "extractedLanguage", - "fmap.content", "extractedContent", - ExtractingParams.STREAM_TYPE, "text/plain" - ); - assertQ(req("extractedContent:Apache"), "//*[@numFound='0']"); - assertU(commit()); - assertQ(req("extractedContent:Apache"), "//*[@numFound='1']"); - } - - public void testPlainTextSpecifyingResourceName() throws Exception { - ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract"); - assertTrue("handler is null and it shouldn't be", handler != null); - - // Load plain text specifying filename - loadLocal("version_control.txt", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer", - "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords", - "fmap.Author", "extractedAuthor", - "literal.id", "one", - "fmap.language", "extractedLanguage", - "fmap.content", "extractedContent", - ExtractingParams.RESOURCE_NAME, "version_control.txt" - ); - assertQ(req("extractedContent:Apache"), "//*[@numFound='0']"); - assertU(commit()); - assertQ(req("extractedContent:Apache"), "//*[@numFound='1']"); - } - - // Note: If you load a plain text file specifying neither MIME type nor filename, extraction will silently fail. This is because Tika's - // automatic MIME type detection will fail, and it will default to using an empty-string-returning default parser - - - public void testExtractOnly() throws Exception { - ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract"); - assertTrue("handler is null and it shouldn't be", handler != null); - SolrQueryResponse rsp = loadLocal("solr-word.pdf", ExtractingParams.EXTRACT_ONLY, "true"); - assertTrue("rsp is null and it shouldn't be", rsp != null); - NamedList list = rsp.getValues(); - - String extraction = (String) list.get("solr-word.pdf"); - assertTrue("extraction is null and it shouldn't be", extraction != null); - assertTrue(extraction + " does not contain " + "solr-word", extraction.indexOf("solr-word") != -1); - - NamedList nl = (NamedList) list.get("solr-word.pdf_metadata"); - assertTrue("nl is null and it shouldn't be", nl != null); - Object title = nl.get("title"); - assertTrue("title is null and it shouldn't be", title != null); - assertTrue(extraction.indexOf(" tags, and they get collapesd - } - - - SolrQueryResponse loadLocal(String filename, String... args) throws Exception { - LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args); - - // TODO: stop using locally defined streams once stream.file and - // stream.body work everywhere - List cs = new ArrayList(); - cs.add(new ContentStreamBase.FileStream(new File(filename))); - req.setContentStreams(cs); - return h.queryAndResponse("/update/extract", req); - } - - -} diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/example.html solr-3.3/contrib/extraction/src/test/resources/example.html --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/example.html 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/example.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ - - - Welcome to Solr - - -

- Here is some text -

-
Here is some text in a div
-
This has a link.
-News - - - - diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/simple.html solr-3.3/contrib/extraction/src/test/resources/simple.html --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/simple.html 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/simple.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,12 +0,0 @@ - - - Welcome to Solr - - -

- Here is some text -

-
Here is some text in a div
-
This has a link.
- - diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/protwords.txt solr-3.3/contrib/extraction/src/test/resources/solr/conf/protwords.txt --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/protwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/solr/conf/protwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#use a protected word file to avoid stemming two -#unrelated words to the same base word. -#to test, we will use words that would normally obviously be stemmed. -cats -ridding diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/schema.xml solr-3.3/contrib/extraction/src/test/resources/solr/conf/schema.xml --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/schema.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/solr/conf/schema.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,471 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - text - id - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml solr-3.3/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,359 +0,0 @@ - - - - - - - - - - - - ${solr.data.dir:./solr/data} - - - - - false - 10 - - - - 32 - 2147483647 - 10000 - 1000 - 10000 - - - false - - - org.apache.lucene.index.LogByteSizeMergePolicy - - - org.apache.lucene.index.ConcurrentMergeScheduler - - 1000 - 10000 - - single - - - - - false - 10 - 32 - 2147483647 - 10000 - - true - - - - - - - - - - - - - - - - - 1024 - - - - - - - - - - - true - - - - - true - - 10 - - - - - - - - - - - - - - - - - - - - - - - - - true - - - - 0.01 - - text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 - - - text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 - - - ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 - - - 3<-1 5<-2 6<90% - - 100 - - - - *:* - 0.01 - - text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 - - - text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 - - - ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 - - - 3<-1 5<-2 6<90% - - 100 - - - - 1000 - 1.4142135 - 12 - foo - - - sqrt 2 - log 10 - - - - - - - - 4 - true - text,name,subject,title,whitetok - - - - - - - 4 - true - text,name,subject,title,whitetok - - - - - - false - - - - - - - - - - 100 - - - - - - 70 - - - - - - - ]]> - ]]> - - - - - - - - - - max-age=30, public - - - - - solr - solrconfig.xml scheam.xml admin-extra.html - - - - prefix-${solr.test.sys.prop2}-suffix - - - - - - diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/stopwords.txt solr-3.3/contrib/extraction/src/test/resources/solr/conf/stopwords.txt --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/stopwords.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/solr/conf/stopwords.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -stopworda -stopwordb diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/synonyms.txt solr-3.3/contrib/extraction/src/test/resources/solr/conf/synonyms.txt --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr/conf/synonyms.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/solr/conf/synonyms.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -a => aa -b => b1 b2 -c => c1,c2 -a\=>a => b\=>b -a\,a => b\,b -foo,bar,baz - -Television,TV,Televisions Binary files /tmp/zX6LM1e92s/solr-1.4.0+ds1/contrib/extraction/src/test/resources/solr-word.pdf and /tmp/DLW8oJ5NGy/solr-3.3/contrib/extraction/src/test/resources/solr-word.pdf differ diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/version_control.txt solr-3.3/contrib/extraction/src/test/resources/version_control.txt --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/version_control.txt 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/version_control.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -Solr Version Control System - -Overview - -The Solr source code resides in the Apache Subversion (SVN) repository. -The command-line SVN client can be obtained here or as an optional package -for cygwin. - -The TortoiseSVN GUI client for Windows can be obtained here. There -are also SVN plugins available for older versions of Eclipse and -IntelliJ IDEA that don't have subversion support already included. - -------------------------------- - -Note: This document is an excerpt from a document Licensed to the -Apache Software Foundation (ASF) under one or more contributor -license agreements. See the XML version (version_control.xml) for -more details. diff -Nru solr-1.4.0+ds1/contrib/extraction/src/test/resources/version_control.xml solr-3.3/contrib/extraction/src/test/resources/version_control.xml --- solr-1.4.0+ds1/contrib/extraction/src/test/resources/version_control.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/extraction/src/test/resources/version_control.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ - - - - - - -
- Solr Version Control System -
- - - -
- Overview -

- The Solr source code resides in the Apache Subversion (SVN) repository. - The command-line SVN client can be obtained here or as an optional package for cygwin. - The TortoiseSVN GUI client for Windows can be obtained here. There - are also SVN plugins available for older versions of Eclipse and - IntelliJ IDEA that don't have subversion support already included. -

-
-

Here is some more text. It contains a link.

-

Text Here

- - -
diff -Nru solr-1.4.0+ds1/contrib/velocity/build.xml solr-3.3/contrib/velocity/build.xml --- solr-1.4.0+ds1/contrib/velocity/build.xml 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/velocity/build.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,125 +0,0 @@ - - - - - - - - - - - - Solritas: Velocity Response Writer - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tests failed! - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru solr-1.4.0+ds1/contrib/velocity/solr-velocity-pom.xml.template solr-3.3/contrib/velocity/solr-velocity-pom.xml.template --- solr-1.4.0+ds1/contrib/velocity/solr-velocity-pom.xml.template 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/velocity/solr-velocity-pom.xml.template 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ - - - - - 4.0.0 - - - org.apache.solr - solr-parent - @maven_version@ - - - org.apache.solr - solr-velocity - Apache Solr Velocity Extension - @maven_version@ - Apache Solr Velocity Extension adds support for integrating Solr and Velocity - jar - - - - commons-lang - commons-lang - 2.4 - - - commons-beanutils - commons-beanutils - 1.7.0 - - - org.apache.velocity - velocity - 1.6.1 - - - org.apache.velocity - velocity-tools - 2.0-beta3 - - - diff -Nru solr-1.4.0+ds1/contrib/velocity/src/main/java/footer.vm solr-3.3/contrib/velocity/src/main/java/footer.vm --- solr-1.4.0+ds1/contrib/velocity/src/main/java/footer.vm 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/velocity/src/main/java/footer.vm 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -## This template currently lives in the java src/ tree as an example of templates being loaded from the classpath - -#if($params.getBool("debugQuery",false)) - #parse("debug.vm") -#end - -
-Generated by VelocityResponseWriter \ No newline at end of file diff -Nru solr-1.4.0+ds1/contrib/velocity/src/main/java/org/apache/solr/request/PageTool.java solr-3.3/contrib/velocity/src/main/java/org/apache/solr/request/PageTool.java --- solr-1.4.0+ds1/contrib/velocity/src/main/java/org/apache/solr/request/PageTool.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/velocity/src/main/java/org/apache/solr/request/PageTool.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,79 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.request; - -import org.apache.solr.search.DocSlice; -import org.apache.solr.common.SolrDocumentList; - -public class PageTool { - private long start; - private int results_per_page = 10; - private long results_found; - private int page_count; - private int current_page_number; - - public PageTool(SolrQueryRequest request, SolrQueryResponse response) { - String rows = request.getParams().get("rows"); - - if (rows != null) { - results_per_page = new Integer(rows); - } - - Object docs = response.getValues().get("response"); - if (docs != null) { - if (docs instanceof DocSlice) { - DocSlice doc_slice = (DocSlice) docs; - results_found = doc_slice.matches(); - start = doc_slice.offset(); - } else { - SolrDocumentList doc_list = (SolrDocumentList) docs; - results_found = doc_list.getNumFound(); - start = doc_list.getStart(); - } - } - - page_count = (int) Math.ceil(results_found / (double) results_per_page); - current_page_number = (int) Math.ceil(start / (double) results_per_page) + (page_count > 0 ? 1 : 0); - } - - public long getStart() { - return start; - } - - public int getResults_per_page() { - return results_per_page; - } - - public long getResults_found() { - return results_found; - } - - public int getPage_count() { - return page_count; - } - - public int getCurrent_page_number() { - return current_page_number; - } - - public String toString() { - return "Found " + results_found + - " Page " + current_page_number + " of " + page_count + - " Starting at " + start + " per page " + results_per_page; - } -} diff -Nru solr-1.4.0+ds1/contrib/velocity/src/main/java/org/apache/solr/request/SolrParamResourceLoader.java solr-3.3/contrib/velocity/src/main/java/org/apache/solr/request/SolrParamResourceLoader.java --- solr-1.4.0+ds1/contrib/velocity/src/main/java/org/apache/solr/request/SolrParamResourceLoader.java 2009-12-15 10:08:27.000000000 +0000 +++ solr-3.3/contrib/velocity/src/main/java/org/apache/solr/request/SolrParamResourceLoader.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.request; - -import org.apache.velocity.runtime.resource.loader.ResourceLoader; -import org.apache.velocity.runtime.resource.Resource; -import org.apache.velocity.exception.ResourceNotFoundException; -import org.apache.commons.collections.ExtendedProperties; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -public class SolrParamResourceLoader extends ResourceLoader { - private Map templates = new HashMap(); - public SolrParamResourceLoader(SolrQueryRequest request) { - super(); - - // TODO: Consider using content streams, but need a template name associated with each stream - // for now, a custom param convention of template.=