From: Maxious Date: Mon, 03 Dec 2012 07:49:55 +0000 Subject: beginning of docx/pdf scrapers X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=29ff266d09edba92b2e51ee98a7b3035428553e5 --- beginning of docx/pdf scrapers Former-commit-id: 72b18d2f2bae7cfce33fb8639ad1523c7bbcc0a3 --- --- a/couchdb/settee +++ /dev/null --- /dev/null +++ b/couchdb/settee/.travis.yml @@ -1,1 +1,6 @@ +language: php +phps: + - 5.3 + - 5.4 +before_script: cd tests/ --- /dev/null +++ b/couchdb/settee/LICENSE.txt @@ -1,1 +1,9 @@ +(The MIT License) +Copyright (c) 2011 Irakli Nadareishvili + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --- /dev/null +++ b/couchdb/settee/README.textile @@ -1,1 +1,60 @@ +Inspired by: "CouchRest library for Ruby":http://jchrisa.net/drl/_design/sofa/_list/post/post-page?startkey=%5B%22couchrest__restful_ruby_client_%22%5D and the "couchdb-python":http://packages.python.org/CouchDB/client.html#document library. +h3. Server Functions + +# Specify a server: +@$server = new SetteeServer('http://127.0.0.1:5984');@ +# Database API +## Create a database: +@$ret = $server->create_db('irakli_test');@ +## Drop a database: +@$ret = $server->drop_db('irakli_test');@ +## List all databases: +@$ret = $server->list_dbs();@ +## Get a database object +@$db = $server->get_db('irakli_test');@ +# Document API +## Create/Update a document: +@$ret = $db->save($doc);@ +## Retrieve a document: +@$db_doc = $db->get($id);@ +## Determine the latest revision_id for a document: +@$rev = $db->get_rev($id);@ +## Delete a document: +@$db_doc = $db->delete($doc);@ +# Attachments API +## Add content as attachment: +@$db->add_attachment($doc, "foo.txt", "Some text that will be base64 encoded", "text/plain");@ +## Add a file path to be attached: +@$db->add_attachment_file($doc, "foo.pdf", $file_path, "application/pdf");@ +## Add a file path to be attached (mime-type is auto-detected): +@$db->add_attachment_file($doc, "foo.pdf", $file_path);@ +## Full attachment saving example: + $doc = new stdClass(); + $doc->_id = "attachment_doc"; + $file_path = dirname(__FILE__) . "/resources/couch-logo.pdf"; + $this->db->add_attachment_file($doc, "foo.pdf", $file_path, "application/pdf"); + $db_doc = $this->db->save($doc); +## ATTENTION: there is no "load_attachments" method, because when you load a document, all its attachments get loaded with it, as well. +# Views API +## Create a new view or save a view: +@$view = $db->save_view("some_design_document_id", "a_view_name", $map_src);@ +@$view = $db->save_view("some_design_document_id", "a_view_name", $map_src, $reduce_src);@ +## Get a view (run query and get results): +@$view = $db->get_view("some_design_document_id", "a_view_name");@ +## Parametrized view: +@$view = $db->get_view("some_design_document_id", "a_view_name", "2009/02/17 21:13:39");@ +## Parametrized view with key range: +@$view = $db->get_view("some_design_document_id", "a_view_name", array("2009/01/30 18:04:11", "2009/02/17 21:13:39"));@ +## Parametrized view with key range, ordered descending: +@$view = $db->get_view("some_design_document_id", "a_view_name", array("2009/01/30 18:04:11", "2009/02/17 21:13:39"), true);@ + + +h3. Requirements +# PHP 5.2 or newer + +h3. Recommended +# PHP 5.3 or newer. With PHP 5.2 following functionality will not work: +## Some unit-tests +## Mime type auto-detection. +# pecl_http --- /dev/null +++ b/couchdb/settee/examples/db.ops.php @@ -1,1 +1,50 @@ +#!/usr/bin/env php + "settee_test_perf_01", + 2 => "settee_test_perf_02", + 3 => "settee_test_perf_03", +); + +print ("creating databases: \n"); + +foreach ($dbs as $db) { + $start = microtime(true); + try { + $ret = $server->create_db($db); + } catch (Exception $e) { + //-- re-throw. this is just for demo + throw $e; + } + $elapsed = microtime(true) - $start; + print("Time elapsed: $elapsed \n"); +} + +$ret = $server->list_dbs(); +print_r($ret); +print ("\n"); + +print ("dropping databases: \n"); + +foreach ($dbs as $db) { + $start = microtime(true); + try { + $ret = $server->drop_db($db); + } catch (Exception $e) { + //-- re-throw. this is just for demo + throw $e; + } + $elapsed = microtime(true) - $start; + print("Time elapsed: $elapsed \n"); +} + +$ret = $server->list_dbs(); +print_r($ret); + --- /dev/null +++ b/couchdb/settee/examples/doc.ops.php @@ -1,1 +1,40 @@ +#!/usr/bin/env php +get_db('irakli'); + +try { + $server->create_db($db); +} catch (Exception $e) { + print_r("database irakli already exists! \n"); +} + +$doc = new StdClass(); +$doc->firstName = "Irakli"; +$doc->lastName = "Nadareishvili"; +$doc->IQ = 200; +$doc->hobbies = array("skiing", "swimming"); +$doc->pets = array ("whitey" => "labrador", "mikey" => "pug"); + +// Should work with json string as well: +//$doc = '{"firstName":"irakli","lastName":"Nadareishvili","IQ":200,"hobbies":["skiing","swimming"],"pets":{"whitey":"labrador","mikey":"pug"}}'; + +$doc = $db->save($doc); +print_r($doc); + +$doc = $db->get($doc->_id); +print_r($doc); + +$doc->firstName = "Ika"; +$doc = $db->save($doc); +print_r($doc); + +$db->delete($doc); + + + --- /dev/null +++ b/couchdb/settee/src/classes/SetteeDatabase.class.php @@ -1,1 +1,310 @@ - +conn_url = $conn_url; + $this->dbname = $dbname; + $this->rest_client = SetteeRestClient::get_instance($this->conn_url); + } + + + /** + * Get UUID from CouchDB + * + * @return + * CouchDB-generated UUID string + * + */ + function gen_uuid() { + $ret = $this->rest_client->http_get('_uuids'); + return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking + } + + /** + * Create or update a document database + * + * @param $document + * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically. + * + *

If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation). + * If "_id" is missing, CouchDB will be used to generate a UUID. + * + *

If $document has a "_rev" property (revision), document will be updated, rather than creating a new document. + * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be + * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but + * not provide "_id" since that is an invalid input. + * + * @param $allowRevAutoDetection + * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision + * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and + * therefore can make save() operation slightly slower if such auto-detection is not required. + * + * @return + * document object with the database id (uuid) and revision attached; + * + * @throws SetteeCreateDatabaseException + */ + function save($document, $allowRevAutoDetection = false) { + if (is_string($document)) { + $document = json_decode($document); + } + + // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter) + if(is_array($document)) { + $document = (object) $document; + } + + if (empty($document->_id) && empty($document->_rev)) { + $id = $this->gen_uuid(); + } + elseif (empty($document->_id) && !empty($document->_rev)) { + throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id"); + } + else { + $id = $document->_id; + + if ($allowRevAutoDetection) { + try { + $rev = $this->get_rev($id); + } catch (SetteeRestClientException $e) { + // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error + } + if (!empty($rev)) { + $document->_rev = $rev; + } + } + } + + $full_uri = $this->dbname . "/" . $this->safe_urlencode($id); + $document_json = json_encode($document, JSON_NUMERIC_CHECK); + + $ret = $this->rest_client->http_put($full_uri, $document_json); + + $document->_id = $ret['decoded']->id; + $document->_rev = $ret['decoded']->rev; + + return $document; + } + + /** + * @param $doc + * @param $name + * @param $content + * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for + * you, so you don't have to do it. + * @param $mime_type + * Optional. Will be auto-detected if not provided + * @return void + */ + public function add_attachment($doc, $name, $content, $mime_type = null) { + if (empty($doc->_attachments) || !is_object($doc->_attachments)) { + $doc->_attachments = new stdClass(); + } + + if (empty($mime_type)) { + $mime_type = $this->rest_client->content_mime_type($content); + } + + $doc->_attachments->$name = new stdClass(); + $doc->_attachments->$name->content_type = $mime_type; + $doc->_attachments->$name->data = base64_encode($content); + } + + /** + * @param $doc + * @param $name + * @param $file + * Full path to a file (e.g. as returned by PHP's realpath function). + * @param $mime_type + * Optional. Will be auto-detected if not provided + * @return void + */ + public function add_attachment_file($doc, $name, $file, $mime_type = null) { + $content = file_get_contents($file); + $this->add_attachment($doc, $name, $content, $mime_type); + } + + /** + * + * Retrieve a document from CouchDB + * + * @throws SetteeWrongInputException + * + * @param $id + * Unique ID (usually: UUID) of the document to be retrieved. + * @return + * database document in PHP object format. + */ + function get($id) { + if (empty($id)) { + throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid."); + } + + $full_uri = $this->dbname . "/" . $this->safe_urlencode($id); +$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri); + $ret = $this->rest_client->http_get($full_uri); + return $ret['decoded']; + } + + /** + * + * Get the latest revision of a document with document id: $id in CouchDB. + * + * @throws SetteeWrongInputException + * + * @param $id + * Unique ID (usually: UUID) of the document to be retrieved. + * @return + * database document in PHP object format. + */ + function get_rev($id) { + if (empty($id)) { + throw new SetteeWrongInputException("Error: Can't query a document without a uuid."); + } + + $full_uri = $this->dbname . "/" . $this->safe_urlencode($id); + $headers = $this->rest_client->http_head($full_uri); + if (empty($headers['Etag'])) { + throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag"); + } + $etag = str_replace('"', '', $headers['Etag']); + return $etag; + } + + /** + * Delete a document + * + * @param $document + * a PHP object or JSON representation of the document that has _id and _rev fields. + * + * @return void + */ + function delete($document) { + if (!is_object($document)) { + $document = json_decode($document); + } + + $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev; + $this->rest_client->http_delete($full_uri); + } + + + /*----------------- View-related functions --------------*/ + + /** + * Create a new view or update an existing one. + * + * @param $design_doc + * @param $view_name + * @param $map_src + * Source code of the map function in Javascript + * @param $reduce_src + * Source code of the reduce function in Javascript (optional) + * @return void + */ + function save_view($design_doc, $view_name, $map_src, $reduce_src = null) { + $obj = new stdClass(); + $obj->_id = "_design/" . urlencode($design_doc); + $view_name = urlencode($view_name); + $obj->views->$view_name->map = $map_src; + if (!empty($reduce_src)) { + $obj->views->$view_name->reduce = $reduce_src; + } + + // allow safe updates (even if slightly slower due to extra: rev-detection check). + return $this->save($obj, true); + } + + /** + * Create a new view or update an existing one. + * + * @param $design_doc + * @param $view_name + * @param $key + * key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes + * that first element is startkey, second: endkey. + * @param $descending + * return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change + * order you also need to swap startkey and endkey values! + * + * @return void + */ + function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false) { + $id = "_design/" . urlencode($design_doc); + $view_name = urlencode($view_name); + $id .= "/_view/$view_name"; + + $data = array(); + if (!empty($key)) { + if (is_string($key)) { + $data = "key=" . '"' . $key . '"'; + } + elseif (is_array($key)) { + list($startkey, $endkey) = $key; + $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"'; + } + + if ($descending) { + $data .= "&descending=true"; + } + if ($limit) { + $data .= "&limit=".$limit; + } + } + + + + if (empty($id)) { + throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid."); + } + + $full_uri = $this->dbname . "/" . $this->safe_urlencode($id); +$full_uri = str_replace("%253Fgroup%253D","?group=",$full_uri); +$full_uri = str_replace("%253Flimit%253D","?limit=",$full_uri); + $ret = $this->rest_client->http_get($full_uri, $data); + return $ret['decoded']; + + } + + /** + * @param $id + * @return + * return a properly url-encoded id. + */ + private function safe_urlencode($id) { + //-- System views like _design can have "/" in their URLs. + $id = rawurlencode($id); + if (substr($id, 0, 1) == '_') { + $id = str_replace('%2F', '/', $id); + } + return $id; + } + + /** Getter for a database name */ + function get_name() { + return $this->dbname; + } + +} --- /dev/null +++ b/couchdb/settee/src/classes/SetteeRestClient.class.php @@ -1,1 +1,246 @@ - +base_url = $base_url; + + $curl = curl_init(); + curl_setopt($curl, CURLOPT_USERAGENT, "Settee CouchDB Client/1.0"); + curl_setopt($curl, CURLOPT_HTTPHEADER, array('Content-Type: application/json')); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($curl, CURLOPT_HEADER, 0); + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); + curl_setopt($curl, CURLOPT_TIMEOUT_MS, self::HTTP_TIMEOUT); + curl_setopt($curl, CURLOPT_FORBID_REUSE, false); // Connection-pool for CURL + + $this->curl = $curl; + + } + + /** + * Class destructor cleans up any resources + */ + function __destruct() { + curl_close($this->curl); + } + + /** + * HTTP HEAD + * + * @return + * Raw HTTP Headers of the response. + * + * @see: http://www.php.net/manual/en/context.params.php + * + */ + function http_head($uri) { + curl_setopt($this->curl, CURLOPT_HEADER, 1); + + $full_url = $this->get_full_url($uri); + curl_setopt($this->curl, CURLOPT_URL, $full_url); + curl_setopt($this->curl, CURLOPT_CUSTOMREQUEST, 'HEAD'); + curl_setopt($this->curl, CURLOPT_NOBODY, true); + + + $response = curl_exec($this->curl); + // Restore default values + curl_setopt($this->curl, CURLOPT_NOBODY, false); + curl_setopt($this->curl, CURLOPT_HEADER, false); + + $resp_code = curl_getinfo($this->curl, CURLINFO_HTTP_CODE); + if ($resp_code == 404 ) { + throw new SetteeRestClientException("Couch document not found at: '$full_url'"); + } + + if (function_exists('http_parse_headers')) { + $headers = http_parse_headers($response); + } + else { + $headers = $this->_http_parse_headers($response); + } + + return $headers; + } + + /** + * Backup PHP impl. for when PECL http_parse_headers() function is not available + * + * @param $header + * @return array + * @source http://www.php.net/manual/en/function.http-parse-headers.php#77241 + */ + private function _http_parse_headers( $header ) { + $retVal = array(); + $fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $header)); + foreach( $fields as $field ) { + if( preg_match('/([^:]+): (.+)/m', $field, $match) ) { + $match[1] = preg_replace('/(?<=^|[\x09\x20\x2D])./e', 'strtoupper("\0")', strtolower(trim($match[1]))); + if( isset($retVal[$match[1]]) ) { + $retVal[$match[1]] = array($retVal[$match[1]], $match[2]); + } else { + $retVal[$match[1]] = trim($match[2]); + } + } + } + return $retVal; + } + + /** + * HTTP GET + */ + function http_get($uri, $data = array()) { + $data = (is_array($data)) ? http_build_query($data) : $data; + if (!empty($data)) { + $uri .= "?$data"; + } + return $this->http_request('GET', $uri); + } + + /** + * HTTP PUT + */ + function http_put($uri, $data = array()) { + return $this->http_request('PUT', $uri, $data); + } + + /** + * HTTP DELETE + */ + function http_delete($uri, $data = array()) { + return $this->http_request('DELETE', $uri, $data); + } + + /** + * Generic implementation of a HTTP Request. + * + * @param $http_method + * @param $uri + * @param array $data + * @return + * an array containing json and decoded versions of the response. + */ + private function http_request($http_method, $uri, $data = array()) { + $data = (is_array($data)) ? http_build_query($data) : $data; + + if (!empty($data)) { + curl_setopt($this->curl, CURLOPT_HTTPHEADER, array('Content-Length: ' . strlen($data))); + curl_setopt($this->curl, CURLOPT_POSTFIELDS, $data); + } + + curl_setopt($this->curl, CURLOPT_URL, $this->get_full_url($uri)); + curl_setopt($this->curl, CURLOPT_CUSTOMREQUEST, $http_method); + + $response = curl_exec($this->curl); + $response_decoded = $this->decode_response($response); + $response = array('json' => $response, 'decoded'=>$response_decoded); + + $this->check_status($response,$uri); + + return $response; + } + + /** + * Check http status for safe return codes + * + * @throws SetteeRestClientException + */ + private function check_status($response,$uri) { + $resp_code = curl_getinfo($this->curl, CURLINFO_HTTP_CODE); + + if ($resp_code < 199 || $resp_code > 399 || !empty($response['decoded']->error)) { + $msg = "CouchDB returned: \"HTTP 1.1. $resp_code\". ERROR: " . $response['json'] . $uri; + throw new SetteeRestClientException($msg); + } + } + + /** + * @param $path + * Full path to a file (e.g. as returned by PHP's realpath function). + * @return void + */ + public function file_mime_type ($path) { + $ftype = 'application/octet-stream'; + + if (function_exists("finfo_file")) { + $finfo = new finfo(FILEINFO_MIME_TYPE | FILEINFO_SYMLINK); + $fres = $finfo->file($path); + if (is_string($fres) && !empty($fres)) { + $ftype = $fres; + } + } + + return $ftype; + } + + /** + * @param $content + * content of a file in a string buffer format. + * @return void + */ + public function content_mime_type ($content) { + $ftype = 'application/octet-stream'; + + if (function_exists("finfo_file")) { + $finfo = new finfo(FILEINFO_MIME_TYPE | FILEINFO_SYMLINK); + $fres = $finfo->buffer($content); + if (is_string($fres) && !empty($fres)) { + $ftype = $fres; + } + } + + return $ftype; + } + + + /** + * + * @param $json + * json-encoded response from CouchDB + * + * @return + * decoded PHP object + */ + private function decode_response($json) { + return json_decode($json); + } + + /** + * Get full URL from a partial one + */ + private function get_full_url($uri) { + // We do not want "/", "?", "&" and "=" separators to be encoded!!! + $uri = str_replace(array('%2F', '%3F', '%3D', '%26'), array('/', '?', '=', '&'), urlencode($uri)); + return $this->base_url . '/' . $uri; + } +} + +class SetteeRestClientException extends Exception {} --- /dev/null +++ b/couchdb/settee/src/classes/SetteeServer.class.php @@ -1,1 +1,106 @@ +conn_url = rtrim($conn_url, ' /'); + $this->rest_client = SetteeRestClient::get_instance($this->conn_url); + } + + /** + * Create database + * + * @param $db + * Either a database object or a String name of the database. + * + * @return + * json string from the server. + * + * @throws SetteeCreateDatabaseException + */ + function create_db($db) { + if ($db instanceof SetteeDatabase) { + $db = $db->get_name(); + } + $ret = $this->rest_client->http_put($db); + if (!empty($ret['decoded']->error)) { + throw new SetteeDatabaseException("Could not create database: " . $ret["json"]); + } + return $ret['decoded']; + } + + /** + * Drop database + * + * @param $db + * Either a database object or a String name of the database. + * + * @return + * json string from the server. + * + * @throws SetteeDropDatabaseException + */ + function drop_db($db) { + if ($db instanceof SetteeDatabase) { + $db = $db->get_name(); + } + $ret = $this->rest_client->http_delete($db); + if (!empty($ret['decoded']->error)) { + throw new SetteeDatabaseException("Could not create database: " . $ret["json"]); + } + return $ret['decoded']; + } + + /** + * Instantiate a database object + * + * @param $dbname + * name of the newly created database + * + * @return SetteeDatabase + * new SetteeDatabase instance. + */ + function get_db($dbname) { + return new SetteeDatabase($this->conn_url, $dbname); + } + + + /** + * Return an array containing all databases + * + * @return Array + * an array of database names in the CouchDB instance + */ + function list_dbs() { + $ret = $this->rest_client->http_get('_all_dbs'); + if (!empty($ret['decoded']["error"])) { + throw new SetteeDatabaseException("Could not get list of databases: " . $ret["json"]); + } + return $ret['decoded']; + } + +} + +class SetteeServerErrorException extends Exception {} +class SetteeDatabaseException extends Exception {} +class SetteeWrongInputException extends Exception {} --- /dev/null +++ b/couchdb/settee/src/settee.php @@ -1,1 +1,6 @@ + sudo upgrade pear + > sudo pear channel-discover pear.phpunit.de + > sudo pear install phpunit/PHPUnit +2. You need PHP 5.3.2 or later to run some tests that deal with private or protected methods. If you use an earlier + version of PHP, these tests will be skipped. + +3. Run all tests with: + > phpunit . --- /dev/null +++ b/couchdb/settee/tests/SetteeDatabaseTest.php @@ -1,1 +1,281 @@ - +db = $this->server->get_db($dbname); + $this->server->create_db($this->db); + } + + public function test_document_lifecycle_objectbased() { + $doc = new StdClass(); + $doc->firstName = "Irakli"; + $doc->lastName = "Nadareishvili"; + $doc->IQ = 200; + $doc->hobbies = array("skiing", "swimming"); + $doc->pets = array ("whitey" => "labrador", "mikey" => "pug"); + + $doc = $this->db->save($doc); + $this->assertTrue(!empty($doc->_id) && !empty($doc->_rev), "Document creation success [object-based]"); + + $_rev = $doc->_rev; + $doc = $this->db->get($doc->_id); + $this->assertEquals($_rev, $doc->_rev, "Document retrieval success [object-based] test"); + + $doc->firstName = "Ika"; + $db_doc = $this->db->save($doc); + $this->assertEquals($doc->firstName, $db_doc->firstName, "Document update success [object-based]"); + + $this->db->delete($doc); + + + try { + $doc = $this->db->get($doc->_id); + } catch (SetteeRestClientException $e) { + // we expect exception to fire, so this is good. + return; + } + + $this->fail('Document still available for retrieval after being deleted. [object-based]'); + } + + // Should work with json string as well: + // + + + public function test_document_lifecycle_jsonbased() { + $doc = '{"firstName":"Irakli","lastName":"Nadareishvili","IQ":200,"hobbies":["skiing","swimming"],"pets":{"whitey":"labrador","mikey":"pug"}}'; + + $doc = $this->db->save($doc); + $this->assertTrue(!empty($doc->_id) && !empty($doc->_rev), "Document creation success [json-based]"); + + $_rev = $doc->_rev; + + $db_doc = $this->db->get($doc->_id); + $this->assertEquals($_rev, $db_doc->_rev, "Document retrieval success [json-based] test"); + + $doc = '{'; + $doc .= '"_id":"' . $db_doc->_id . '",'; + $doc .= '"_rev":"' . $db_doc->_rev . '",'; + $doc .= '"firstName":"Ika","lastName":"Nadareishvili","IQ":200,"hobbies":["skiing","swimming"],"pets":{"whitey":"labrador","mikey":"pug"}}'; + + $orig_doc = json_decode($doc); + $db_doc = $this->db->save($doc); + $this->assertEquals($orig_doc->firstName, $db_doc->firstName, "Document update success [json-based]"); + + $doc = '{'; + $doc .= '"_id":"' . $db_doc->_id . '",'; + $doc .= '"_rev":"' . $db_doc->_rev . '",'; + $doc .= '"firstName":"Ika","lastName":"Nadareishvili","IQ":200,"hobbies":["skiing","swimming"],"pets":{"whitey":"labrador","mikey":"pug"}}'; + + $this->db->delete($doc); + + try { + $doc = $this->db->get($db_doc->_id); + } catch (SetteeRestClientException $e) { + // we expect exception to fire, so this is good. + return; + } + + $this->fail('Document still available for retrieval after being deleted. [object-based]'); + } + + public function test_invalid_document() { + $doc = 12345; + try { + $doc = $this->db->save($doc); + } catch (SetteeRestClientException $e) { + // we expect exception to fire, so this is good. + return; + } + + $this->fail('Document saved with invalid format'); + } + + public function test_get_rev() { + $doc = new stdClass(); + $doc->_id = "some_fixed_id"; + $doc = $this->db->save($doc); + + $_rev = $doc->_rev; + + $db_rev = $this->db->get_rev($doc->_id); + $this->assertEquals($_rev, $db_rev, "Document Revision retrieval success"); + + // _rev is now attached to this object due to last ->save() call + $doc->_id = "some_fixed_id"; + $doc->title = "Some Fixed ID"; + $doc = $this->db->save($doc); + + $_rev = $doc->_rev; + + $db_rev = $this->db->get_rev($doc->_id); + $this->assertEquals($_rev, $db_rev, "Document Revision retrieval success after re-save"); + + } + + public function test_save_auto_revision_detection() { + $doc = new stdClass(); + $doc->_id = "some_fixed_id"; + $this->db->save($doc); + + $doc = new stdClass(); + $doc->_id = "some_fixed_id"; + $doc->extra_field = "some other value"; + + $new_doc = $this->db->save($doc, true); + $this->assertEquals ($new_doc->extra_field, "some other value", "Testing auto-rev detection by save method"); + } + + public function test_inline_attachment_json() { + $doc = '{ + "_id":"attachment_doc", + "_attachments": + { + "foo.txt": + { + "content_type":"text\/plain", + "data": "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + } + } + }'; + $db_doc = $this->db->save($doc); + $this->assertTrue(is_object($db_doc->_attachments), "Inline attachment save successful [json-based]"); + } + + public function test_inline_attachment_obj_content() { + $doc = new stdClass(); + $doc->_id = "attachment_doc"; + $this->db->add_attachment($doc, "foo.txt", "This is some text to be encoded", "text/plain"); + $db_doc = $this->db->save($doc); + $this->assertTrue(is_object($db_doc->_attachments), "Inline attachment save successful [object-based]"); + + $doc = new stdClass(); + $doc->_id = "attachment_doc_autodetect"; + $this->db->add_attachment($doc, "foo.txt", "This is some other text to be encoded"); + $db_doc = $this->db->save($doc); + $this->assertTrue(is_object($db_doc->_attachments), "Inline attachment save successful [object-based, mime auto-detection]"); + } + + public function test_inline_attachment_obj_file() { + $doc = new stdClass(); + $doc->_id = "attachment_doc"; + $file_path = dirname(__FILE__) . "/resources/couch-logo.pdf"; + $this->db->add_attachment_file($doc, "foo.pdf", $file_path, "application/pdf"); + $db_doc = $this->db->save($doc); + $this->assertTrue(is_object($db_doc->_attachments), "Inline attachment of file successful"); + + $doc = new stdClass(); + $doc->_id = "attachment_doc_autodetect"; + $file_path = dirname(__FILE__) . "/resources/couch-logo.pdf"; + $this->db->add_attachment_file($doc, "foo.pdf", $file_path); + $db_doc = $this->db->save($doc); + $this->assertTrue(is_object($db_doc->_attachments), "Inline attachment of file successful w/ mime type auto-detection"); + } + + public function test_view_lifecycle() { + $this->_create_some_sample_docs(); + + $map_src = <<db->save_view("foo_views", "bar_view", $map_src); + $this->assertEquals("_design/foo_views", $view->_id, "View Creation Success"); + + $view = $this->db->get_view("foo_views", "bar_view"); + $this->assertEquals(3, $view->total_rows, "Running a View Success"); + + $map_src = <<db->save_view("foo_views", "bar_view", $map_src); + $this->assertEquals("_design/foo_views", $view->_id, "View Update Success"); + + $view = $this->db->get_view("foo_views", "bar_view"); + $this->assertEquals("Well hello and welcome to my new blog...", $view->rows[0]->value->body, "Running a View Success (after update)"); + + $view = $this->db->get_view("foo_views", "bar_view", "2009/02/17 21:13:39"); + $this->assertEquals("Bought a Cat", $view->rows[0]->value->title, "Running a Parametrized View"); + + $view = $this->db->get_view("foo_views", "bar_view", array("2009/01/30 18:04:11", "2009/02/17 21:13:39")); + $this->assertEquals("Biking", $view->rows[0]->value->title, "Running a Parametrized View with range"); + + $view = $this->db->get_view("foo_views", "bar_view", array("2009/02/17 21:13:39", "2009/01/30 18:04:11"), true); + $this->assertEquals("Bought a Cat", $view->rows[0]->value->title, "Running a Parametrized View with range, descending"); + $this->assertEquals(2, count($view->rows), "Running a Parametrized View with range, descending [count]"); + +} + + function test_two_views_in_a_design_doc() { + + $map_src = <<db->save_view("a_settee_design_doc", "foo_view", $map_src); + $this->assertTrue(isset($view->views->foo_view), "View1 Creation Success"); + + $view = $this->db->save_view("a_settee_design_doc", "bar_view", $map_src); + $this->assertTrue(isset($view->views->bar_view), "View2 Creation Success"); + } + + /** + * Create some sample docs for running tests on them. + * + *

This sample was taken from a wonderful book: + * CouchDB: The Definitive Guide (Animal Guide) by J. Chris Anderson, Jan Lehnardt and Noah Slater + * http://www.amazon.com/CouchDB-Definitive-Guide-Relax-Animal/dp/0596155891/ref=sr_1_1?ie=UTF8&qid=1311533443&sr=8-1 + * + * @return void + */ + private function _create_some_sample_docs() { + $doc = new stdClass(); + $doc->_id = "biking"; + $doc->title = "Biking"; + $doc->body = "My biggest hobby is mountainbiking"; + $doc->date = "2009/01/30 18:04:11"; + $this->db->save($doc); + + $doc = new stdClass(); + $doc->_id = "bought-a-cat"; + $doc->title = "Bought a Cat"; + $doc->body = "I went to the the pet store earlier and brought home a little kitty..."; + $doc->date = "2009/02/17 21:13:39"; + $this->db->save($doc); + + $doc = new stdClass(); + $doc->_id = "hello-world"; + $doc->title = "Hello World"; + $doc->body = "Well hello and welcome to my new blog..."; + $doc->date = "2009/01/15 15:52:20"; + $this->db->save($doc); + } + + public function tearDown() { + $ret = $this->server->drop_db($this->db); + } + +} + + --- /dev/null +++ b/couchdb/settee/tests/SetteeRestClientTest.php @@ -1,1 +1,90 @@ +rest_client = SetteeRestClient::get_instance($this->db_url); + } + + public function test_get_full_url() { + + //-- Can't run this test in PHP versions earlier than 5.3.2, which do not support ReflectionMethod class. + if (!class_exists('ReflectionMethod')) { + return; + } + + //-- Prepare for testing the private full_url_method method. + $get_full_url_method = new ReflectionMethod('SetteeRestClient', 'get_full_url'); + $get_full_url_method->setAccessible(TRUE); + + $uri = 'irakli/26cede9ab9cd8fcd67895eb05200d1ea'; + //-- Equivalent to: $calc = $this->rest_client->get_full_url($uri); but for a private method. + $calc = $get_full_url_method->invokeArgs($this->rest_client, array($uri)); + //-- + $expected = $this->db_url . '/irakli/26cede9ab9cd8fcd67895eb05200d1ea'; + $this->assertEquals($expected, $calc, "Full URL Generation with DB and ID"); + + $uri = 'irakli/26cede9ab9cd8fcd67895eb05200d1ea?rev=2-21587f7dffc43b4100f40168f309a267'; + $calc = $get_full_url_method->invokeArgs($this->rest_client, array($uri)); + $expected = $this->db_url . '/irakli/26cede9ab9cd8fcd67895eb05200d1ea?rev=2-21587f7dffc43b4100f40168f309a267'; + $this->assertEquals($expected, $calc, "Full URL Generation with DB, ID and Single Query Parameter"); + + $uri = 'irakli/26cede9ab9cd8fcd67895eb05200d1ea?rev=2-21587f7dffc43b4100f40168f309a267&second=foo'; + $calc = $get_full_url_method->invokeArgs($this->rest_client, array($uri)); + $expected = $this->db_url . '/irakli/26cede9ab9cd8fcd67895eb05200d1ea?rev=2-21587f7dffc43b4100f40168f309a267&second=foo'; + $this->assertEquals($expected, $calc, "Full URL Generation with DB, ID and Two Query Parameters"); + + } + + public function test_file_mime_type() { + + $type = $this->rest_client->file_mime_type(dirname(__FILE__) . "/resources/couch-logo.jpg"); + $this->assertEquals("image/jpeg", $type, "Jpeg Mime Type Detection"); + + $type = $this->rest_client->file_mime_type(dirname(__FILE__) . "/resources/couch-logo.pdf"); + $this->assertEquals("application/pdf", $type, "PDF Mime Type Detection"); + + + $type = $this->rest_client->file_mime_type(dirname(__FILE__) . "/resources/couch-logo.png"); + $this->assertEquals("image/png", $type, "PNG Mime Type Detection"); + + $type = $this->rest_client->file_mime_type(dirname(__FILE__) . "/resources/couch-tag.ini"); + $this->assertEquals("text/plain", $type, "Text Mime Type Detection"); + + $type = $this->rest_client->file_mime_type(dirname(__FILE__) . "/resources/couch-tag.xml"); + $this->assertEquals("application/xml", $type, "XML Mime Type Detection"); + } + + public function test_content_mime_type() { + $content = file_get_contents(dirname(__FILE__) . "/resources/couch-logo.jpg"); + $type = $this->rest_client->content_mime_type($content); + $this->assertEquals("image/jpeg", $type, "Jpeg Mime Type Detection"); + + $content = file_get_contents(dirname(__FILE__) . "/resources/couch-logo.pdf"); + $type = $this->rest_client->content_mime_type($content); + $this->assertEquals("application/pdf", $type, "PDF Mime Type Detection"); + + $content = file_get_contents(dirname(__FILE__) . "/resources/couch-logo.png"); + $type = $this->rest_client->content_mime_type($content); + $this->assertEquals("image/png", $type, "PNG Mime Type Detection"); + + $content = file_get_contents(dirname(__FILE__) . "/resources/couch-tag.ini"); + $type = $this->rest_client->content_mime_type($content); + $this->assertEquals("text/plain", $type, "Text Mime Type Detection"); + + $content = file_get_contents(dirname(__FILE__) . "/resources/couch-tag.xml"); + $type = $this->rest_client->content_mime_type($content); + $this->assertEquals("application/xml", $type, "XML Mime Type Detection"); + } + + + +} + + --- /dev/null +++ b/couchdb/settee/tests/SetteeServerTest.php @@ -1,1 +1,43 @@ +dbname = "settee_tests_" . md5(microtime(true)); + } + + public function test_database_lifecycle_namebased() { + $db = $this->server->get_db($this->dbname); + $ret = $this->server->create_db($this->dbname); + $this->assertTrue($ret->ok, "Database Creation Success Response [name-based]"); + + $database_list = $this->server->list_dbs(); + $this->assertTrue(is_array($database_list) && in_array($this->dbname, $database_list), + "Verifying Database in the List on the Server [name-based]"); + + $ret = $this->server->drop_db($this->dbname); + $this->assertTrue($ret->ok, "Database Deletion Success Response [name-based]"); + } + + public function test_database_lifecycle_objectbased() { + $db = $this->server->get_db($this->dbname); + $ret = $this->server->create_db($db); + $this->assertTrue($ret->ok, "Database Creation Success Response [object-based]"); + + $database_list = $this->server->list_dbs(); + $this->assertTrue(is_array($database_list) && in_array($this->dbname, $database_list), + "Verifying Database in the List on the Server [object-based]"); + + $ret = $this->server->drop_db($db); + $this->assertTrue($ret->ok, "Database Deletion Success Response [object-based]"); + } + +} + + --- /dev/null +++ b/couchdb/settee/tests/SetteeTestCase.class.php @@ -1,1 +1,20 @@ +db_url = isset($GLOBALS['db_url']) ? $GLOBALS['db_url'] : 'http://127.0.0.1:5984'; + $this->db_user = isset($GLOBALS['db_user']) ? $GLOBALS['db_user'] : 'admin'; + $this->db_pass = isset($GLOBALS['db_pass']) ? $GLOBALS['db_pass'] : 'admin'; + $this->server = new SetteeServer($this->db_url); + } + +} --- /dev/null +++ b/couchdb/settee/tests/phpunitConfig.xml @@ -1,1 +1,8 @@ + + + + + + + --- /dev/null +++ b/couchdb/settee/tests/resources/couch-tag.ini @@ -1,1 +1,2 @@ +Couchdb=relax --- /dev/null +++ b/couchdb/settee/tests/resources/couch-tag.xml @@ -1,1 +1,5 @@ + + +

CouchDB - Relax
+ --- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -11,6 +11,19 @@ from datetime import * import codecs +from StringIO import StringIO + +from docx import * +from lxml import etree +import zipfile + +from pdfminer.pdfparser import PDFDocument, PDFParser +from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf +from pdfminer.pdfdevice import PDFDevice, TagExtractor +from pdfminer.converter import TextConverter +from pdfminer.cmapdb import CMapDB +from pdfminer.layout import LAParams + class GenericDisclogScraper(object): __metaclass__ = abc.ABCMeta agencyID = None @@ -35,11 +48,78 @@ """ do the scraping """ return - @abc.abstractmethod - def getDescription(self, content, entry, doc): - """ get description""" - return - +class GenericPDFDisclogScraper(GenericDisclogScraper): + + def doScrape(self): + foidocsdb = scrape.couch['disclosr-foidocuments'] + (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) + + laparams = LAParams() + + rsrcmgr = PDFResourceManager(caching=True) + + outfp = StringIO.StringIO() + + device = TextConverter(rsrcmgr, outfp, codec='utf-8', laparams=laparams) + + + fp = StringIO.StringIO() + fp.write(content) + description = output.getvalue(); + process_pdf(rsrcmgr, device, fp, set(), caching=True, check_extractable=True) + fp.close() + device.close() + outfp.close() + + hash = scrape.mkhash(description) + #print hash + doc = foidocsdb.get(hash) + #print doc + if doc == None: + print "saving "+ hash + edate = datetime.fromtimestamp(mktime( )).strftime("%Y-%m-%d") + doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': hash, + "date": edate,"title": "Disclosure Log Updated"} + self.getDescription(entry,entry, doc) + foidocsdb.save(doc) + else: + print "already saved" + + +class GenericDOCXDisclogScraper(GenericDisclogScraper): + + def doScrape(self): + foidocsdb = scrape.couch['disclosr-foidocuments'] + (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) + + mydoc = zipfile.ZipFile(file) + xmlcontent = mydoc.read('word/document.xml') + document = etree.fromstring(xmlcontent) + + ## Fetch all the text out of the document we just created + paratextlist = getdocumenttext(document) + + # Make explicit unicode version + newparatextlist = [] + for paratext in paratextlist: + newparatextlist.append(paratext.encode("utf-8")) + + ## Print our documnts test with two newlines under each paragraph + description = '\n\n'.join(newparatextlist) + + hash = scrape.mkhash(description) + #print hash + doc = foidocsdb.get(hash) + #print doc + if doc == None: + print "saving "+ hash + edate = datetime.fromtimestamp(mktime()).strftime("%Y-%m-%d") + doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': hash, + "date": edate,"title": "Disclosure Log Updated"} + self.getDescription(entry,entry, doc) + foidocsdb.save(doc) + else: + print "already saved" class GenericRSSDisclogScraper(GenericDisclogScraper): --- a/documents/rss.xml.php +++ b/documents/rss.xml.php @@ -9,9 +9,9 @@ $TestFeed = new RSS2FeedWriter(); //Setting the channel elements //Use wrapper functions for common channelelements -$TestFeed->setTitle('Last Modified - All'); +$TestFeed->setTitle('disclosurelo.gs Newest Entries - All'); $TestFeed->setLink('http://disclosurelo.gs/rss.xml.php'); -$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); +$TestFeed->setDescription('disclosurelo.gs Newest Entries - All Agencies'); $TestFeed->setChannelElement('language', 'en-us'); $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time())); //Retriving informations from database @@ -21,20 +21,21 @@ $idtoname[$row->id] = trim($row->value->name); } $foidocsdb = $server->get_db('disclosr-foidocuments'); -$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows; +$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00', 50), true)->rows; //print_r($rows); foreach ($rows as $row) { //Create an empty FeedItem $newItem = $TestFeed->createNewItem(); //Add elements to the feed item $newItem->setTitle($row->value->title); - $newItem->setLink("view.php?id=".$row->value->docID); - $newItem->setDate(date("c", strtotime($row->value->date))); + $newItem->setLink("http://disclosurelo.gs/view.php?id=".$row->value->_id); + $newItem->setDate(strtotime($row->value->date)); $newItem->setDescription(displayLogEntry($row,$idtoname)); - $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true')); + $newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=".$row->value->_id,array('isPermaLink'=>'true')); //Now add the feed item $TestFeed->addItem($newItem); } //OK. Everything is done. Now genarate the feed. $TestFeed->generateFeed(); ?> + --- /dev/null +++ b/documents/runScrapers.sh @@ -1,1 +1,3 @@ +for f in scrapers/*.py; do echo "Processing $f file.."; python $f; done + --- a/documents/template.inc.php +++ b/documents/template.inc.php @@ -21,7 +21,7 @@ - + --- /dev/null +++ b/lib/FeedWriter/COPYING @@ -1,1 +1,675 @@ - + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a c