add innovation scrAper
add innovation scrAper


Former-commit-id: 80558a9217d1bcad0766200d0e1d42aa022ff501

<?php <?php
   
/** /**
* Databaase class. * Databaase class.
*/ */
class SetteeDatabase { class SetteeDatabase {
   
/** /**
* Base URL of the CouchDB REST API * Base URL of the CouchDB REST API
*/ */
private $conn_url; private $conn_url;
/** /**
* HTTP REST Client instance * HTTP REST Client instance
*/ */
protected $rest_client; protected $rest_client;
/** /**
* Name of the database * Name of the database
*/ */
private $dbname; private $dbname;
/** /**
* Default constructor * Default constructor
*/ */
function __construct($conn_url, $dbname) { function __construct($conn_url, $dbname) {
$this->conn_url = $conn_url; $this->conn_url = $conn_url;
$this->dbname = $dbname; $this->dbname = $dbname;
$this->rest_client = SetteeRestClient::get_instance($this->conn_url); $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
} }
   
   
/** /**
* Get UUID from CouchDB * Get UUID from CouchDB
* *
* @return * @return
* CouchDB-generated UUID string * CouchDB-generated UUID string
* *
*/ */
function gen_uuid() { function gen_uuid() {
$ret = $this->rest_client->http_get('_uuids'); $ret = $this->rest_client->http_get('_uuids');
return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
} }
   
/** /**
* Create or update a document database * Create or update a document database
* *
* @param $document * @param $document
* PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically. * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
* *
* <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation). * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
* If "_id" is missing, CouchDB will be used to generate a UUID. * If "_id" is missing, CouchDB will be used to generate a UUID.
* *
* <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document. * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
* You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
* one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
* not provide "_id" since that is an invalid input. * not provide "_id" since that is an invalid input.
* *
* @param $allowRevAutoDetection * @param $allowRevAutoDetection
* Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
* for a document and use it. This option is "false" by default because it involves an extra http HEAD request and * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
* therefore can make save() operation slightly slower if such auto-detection is not required. * therefore can make save() operation slightly slower if such auto-detection is not required.
* *
* @return * @return
* document object with the database id (uuid) and revision attached; * document object with the database id (uuid) and revision attached;
* *
* @throws SetteeCreateDatabaseException * @throws SetteeCreateDatabaseException
*/ */
function save($document, $allowRevAutoDetection = false) { function save($document, $allowRevAutoDetection = false) {
if (is_string($document)) { if (is_string($document)) {
$document = json_decode($document); $document = json_decode($document);
} }
   
// Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter) // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)
if(is_array($document)) { if(is_array($document)) {
$document = (object) $document; $document = (object) $document;
} }
   
if (empty($document->_id) && empty($document->_rev)) { if (empty($document->_id) && empty($document->_rev)) {
$id = $this->gen_uuid(); $id = $this->gen_uuid();
} }
elseif (empty($document->_id) && !empty($document->_rev)) { elseif (empty($document->_id) && !empty($document->_rev)) {
throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id"); throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
} }
else { else {
$id = $document->_id; $id = $document->_id;
   
if ($allowRevAutoDetection) { if ($allowRevAutoDetection) {
try { try {
$rev = $this->get_rev($id); $rev = $this->get_rev($id);
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
// auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
} }
if (!empty($rev)) { if (!empty($rev)) {
$document->_rev = $rev; $document->_rev = $rev;
} }
} }
} }
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id); $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
$document_json = json_encode($document, JSON_NUMERIC_CHECK); $document_json = json_encode($document, JSON_NUMERIC_CHECK);
$ret = $this->rest_client->http_put($full_uri, $document_json); $ret = $this->rest_client->http_put($full_uri, $document_json);
   
$document->_id = $ret['decoded']->id; $document->_id = $ret['decoded']->id;
$document->_rev = $ret['decoded']->rev; $document->_rev = $ret['decoded']->rev;
   
return $document; return $document;
} }
   
/** /**
* @param $doc * @param $doc
* @param $name * @param $name
* @param $content * @param $content
* Content of the attachment in a string-buffer format. This function will automatically base64-encode content for * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
* you, so you don't have to do it. * you, so you don't have to do it.
* @param $mime_type * @param $mime_type
* Optional. Will be auto-detected if not provided * Optional. Will be auto-detected if not provided
* @return void * @return void
*/ */
public function add_attachment($doc, $name, $content, $mime_type = null) { public function add_attachment($doc, $name, $content, $mime_type = null) {
if (empty($doc->_attachments) || !is_object($doc->_attachments)) { if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
$doc->_attachments = new stdClass(); $doc->_attachments = new stdClass();
} }
   
if (empty($mime_type)) { if (empty($mime_type)) {
$mime_type = $this->rest_client->content_mime_type($content); $mime_type = $this->rest_client->content_mime_type($content);
} }
   
$doc->_attachments->$name = new stdClass(); $doc->_attachments->$name = new stdClass();
$doc->_attachments->$name->content_type = $mime_type; $doc->_attachments->$name->content_type = $mime_type;
$doc->_attachments->$name->data = base64_encode($content); $doc->_attachments->$name->data = base64_encode($content);
} }
   
/** /**
* @param $doc * @param $doc
* @param $name * @param $name
* @param $file * @param $file
* Full path to a file (e.g. as returned by PHP's realpath function). * Full path to a file (e.g. as returned by PHP's realpath function).
* @param $mime_type * @param $mime_type
* Optional. Will be auto-detected if not provided * Optional. Will be auto-detected if not provided
* @return void * @return void
*/ */
public function add_attachment_file($doc, $name, $file, $mime_type = null) { public function add_attachment_file($doc, $name, $file, $mime_type = null) {
$content = file_get_contents($file); $content = file_get_contents($file);
$this->add_attachment($doc, $name, $content, $mime_type); $this->add_attachment($doc, $name, $content, $mime_type);
} }
   
/** /**
* *
* Retrieve a document from CouchDB * Retrieve a document from CouchDB
* *
* @throws SetteeWrongInputException * @throws SetteeWrongInputException
* *
* @param $id * @param $id
* Unique ID (usually: UUID) of the document to be retrieved. * Unique ID (usually: UUID) of the document to be retrieved.
* @return * @return
* database document in PHP object format. * database document in PHP object format.
*/ */
function get($id) { function get($id) {
if (empty($id)) { if (empty($id)) {
throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid."); throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
} }
   
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id); $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri); $full_uri = str_replace("%3Frev%3D","?rev=",$full_uri);
$ret = $this->rest_client->http_get($full_uri); $ret = $this->rest_client->http_get($full_uri);
return $ret['decoded']; return $ret['decoded'];
} }
   
/** /**
* *
* Get the latest revision of a document with document id: $id in CouchDB. * Get the latest revision of a document with document id: $id in CouchDB.
* *
* @throws SetteeWrongInputException * @throws SetteeWrongInputException
* *
* @param $id * @param $id
* Unique ID (usually: UUID) of the document to be retrieved. * Unique ID (usually: UUID) of the document to be retrieved.
* @return * @return
* database document in PHP object format. * database document in PHP object format.
*/ */
function get_rev($id) { function get_rev($id) {
if (empty($id)) { if (empty($id)) {
throw new SetteeWrongInputException("Error: Can't query a document without a uuid."); throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
} }
   
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id); $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
$headers = $this->rest_client->http_head($full_uri); $headers = $this->rest_client->http_head($full_uri);
if (empty($headers['Etag'])) { if (empty($headers['Etag'])) {
throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag"); throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");
} }
$etag = str_replace('"', '', $headers['Etag']); $etag = str_replace('"', '', $headers['Etag']);
return $etag; return $etag;
} }
/** /**
* Delete a document * Delete a document
* *
* @param $document * @param $document
* a PHP object or JSON representation of the document that has _id and _rev fields. * a PHP object or JSON representation of the document that has _id and _rev fields.
* *
* @return void * @return void
*/ */
function delete($document) { function delete($document) {
if (!is_object($document)) { if (!is_object($document)) {
$document = json_decode($document); $document = json_decode($document);
} }
   
$full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev; $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;
$this->rest_client->http_delete($full_uri); $this->rest_client->http_delete($full_uri);
} }
   
/*----------------- View-related functions --------------*/ /*----------------- View-related functions --------------*/
   
/** /**
* Create a new view or update an existing one. * Create a new view or update an existing one.
* *
* @param $design_doc * @param $design_doc
* @param $view_name * @param $view_name
* @param $map_src * @param $map_src
* Source code of the map function in Javascript * Source code of the map function in Javascript
* @param $reduce_src * @param $reduce_src
* Source code of the reduce function in Javascript (optional) * Source code of the reduce function in Javascript (optional)
* @return void * @return void
*/ */
function save_view($design_doc, $view_name, $map_src, $reduce_src = null) { function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode($design_doc); $obj->_id = "_design/" . urlencode($design_doc);
$view_name = urlencode($view_name); $view_name = urlencode($view_name);
$obj->views->$view_name->map = $map_src; $obj->views->$view_name->map = $map_src;
if (!empty($reduce_src)) { if (!empty($reduce_src)) {
$obj->views->$view_name->reduce = $reduce_src; $obj->views->$view_name->reduce = $reduce_src;
} }
   
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
return $this->save($obj, true); return $this->save($obj, true);
} }
   
/** /**
* Create a new view or update an existing one. * Create a new view or update an existing one.
* *
* @param $design_doc * @param $design_doc
* @param $view_name * @param $view_name
* @param $key * @param $key
* key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes * key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes
* that first element is startkey, second: endkey. * that first element is startkey, second: endkey.
* @param $descending * @param $descending
* return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change * return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change
* order you also need to swap startkey and endkey values! * order you also need to swap startkey and endkey values!
* *
* @return void * @return void
*/ */
function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false) { function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false, $reduce=null) {
$id = "_design/" . urlencode($design_doc); $id = "_design/" . urlencode($design_doc);
$view_name = urlencode($view_name); $view_name = urlencode($view_name);
$id .= "/_view/$view_name"; $id .= "/_view/$view_name";
   
$data = array(); $data = array();
if (!empty($key)) { if (!empty($key)) {
if (is_string($key)) { if (is_string($key)) {
$data = "key=" . '"' . $key . '"'; $data = "key=" . '"' . $key . '"';
} }
elseif (is_array($key)) { elseif (is_array($key)) {
list($startkey, $endkey) = $key; list($startkey, $endkey) = $key;
$data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"'; $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';
} }
   
if ($descending) { if ($descending) {
$data .= "&descending=true"; $data .= "&descending=true";
} }
  if ($reduce != null) {
  if ($reduce == true) {
  $data .= "&reduce=true";
  } else {
  $data .= "&reduce=false";
  }
  }
if ($limit) { if ($limit) {
$data .= "&limit=".$limit; $data .= "&limit=".$limit;
} }
} }
   
   
   
if (empty($id)) { if (empty($id)) {
throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid."); throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
} }
   
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id); $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
   
$full_uri = str_replace("%253Fgroup%253D","?group=",$full_uri); $full_uri = str_replace("%253Fgroup%253D","?group=",$full_uri);
$full_uri = str_replace("%253Flimit%253D","?limit=",$full_uri); $full_uri = str_replace("%253Flimit%253D","?limit=",$full_uri);
$ret = $this->rest_client->http_get($full_uri, $data); $ret = $this->rest_client->http_get($full_uri, $data);
  //$ret['decoded'] = str_replace("?k","&k",$ret['decoded']);
return $ret['decoded']; return $ret['decoded'];
} }
   
/**