single record scraper
single record scraper

--- a/admin/partialdata/01Jan1970to01Jan1970val0to16000.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search

-

-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	

-There are no results that match your selection. 
+

--- a/admin/partialdata/01Jan1970to01Jan1970valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search

-

-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	

-There are no results that match your selection. 
+

--- a/admin/partialdata/07Jun2008to09Jun2008valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search

-

-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	

-There are no results that match your selection. 
+

--- a/admin/partialdata/12May2008to14May2008val0to8000.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search

-

-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	

-There are no results that match your selection. 
+

--- a/admin/partialdata/14Aug2007to18Aug2007valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search
 
-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	
-There are no results that match your selection. 

--- a/admin/partialdata/14Aug2007to22Aug2007valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search
 
-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	
-There are no results that match your selection. 

--- a/admin/partialdata/14Aug2007to29Aug2007valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search
 
-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	
-There are no results that match your selection. 

--- a/admin/partialdata/14Nov2007to18Nov2007valto.xls
+++ b/admin/partialdata/14Nov2007to18Nov2007valto.xls
@@ -254,7 +254,7 @@
 ="CN46513"	"Secure LAN facilities for ICON RFT project"	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Insurance and retirement services"	06-Nov-07	31-May-08	26290.12	=""	="MBITS - Managed Business IT Solutions"	15-Nov-07 02:02 PM	
 ="CN46515"	"Business Analyst"	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Management advisory services"	01-Nov-07	01-Feb-08	14960.00	=""	="Dialog Information Technology"	15-Nov-07 02:02 PM	
 ="CN46517-A1"	"Provision of recruitment services for the 2009 Traineeship Programme."	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Human resources services"	31-Oct-07	29-Feb-08	18053.00	=""	="Ross Human Directions Limited"	15-Nov-07 02:03 PM	
-="CN46518"	"To facilitate negotiations and implement Resource Sharing agreements in Australia's Tuna and Billfish fisheries. The consultant activities are as follows:1. Review existing options (research & interpretation)2. Consultation with stakeholders to identify views and positions (facilitation)3. Collate data, analyse data and prepare draft revised agreements between parties (research)4. Consultations to present, discuss and refine agreements between parties (facilitation)5. Further develop/refine agreements and implementation capabilities6. Consultation to finalise agreements and implementation capabilities7. Written report to Government on matters arising from the agreement and implementation.The key output of these activities will be revised recommendations for  resource sharing arrangement in the WTBF and the ETBF based on successful stakeholder consultation."	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Management advisory services"	25-Oct-07	25-Oct-08	10843.69	=""	="Ewan Colquhoun, RidgePartners"	15-Nov-07 02:03 PM	
+="CN46518"	"To facilitate negotiations and implement Resource Sharing agreements in Australia's Tuna and Billfish fisheries. "	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Management advisory services"	25-Oct-07	25-Oct-08	10843.69	=""	="Ewan Colquhoun, RidgePartners"	15-Nov-07 02:03 PM	
 ="CN46519"	"Sample Analysis - Foreign Certificate Audit"	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Management and Business Professionals and Administrative Services"	30-Oct-07	30-Apr-08	50000.00	=""	="National Measurement Institute"	15-Nov-07 02:03 PM	
 ="CN46520"	"Sample Analysis - Foreign Certificate Audit"	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Management and Business Professionals and Administrative Services"	30-Oct-07	30-Apr-08	20000.00	=""	="Silliker Microtech Pty Ltd"	15-Nov-07 02:03 PM	
 ="CN46521"	"Assessing the feasibility of stereo video and evaluating monitoring options for the southern bluefin tuna farm sector"	="Department of Agriculture Fisheries and Forestry"	15-Nov-07	="Management advisory services"	01-Jul-07	31-Dec-07	22000.00	=""	="Australian Southern Bluefin Tuna Industry Association"	15-Nov-07 02:03 PM	

--- a/admin/partialdata/15Sep2007to16Sep2007valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search
 
-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	
-There are no results that match your selection. 

--- a/admin/partialdata/22Aug2007to30Aug2007valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search
 
-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	
-There are no results that match your selection. 

--- a/admin/partialdata/29Aug2007to02Sep2007valto.xls
+++ /dev/null
@@ -1,4 +1,1 @@
-Advanced Search
 
-CN ID	Title	Agency	Publish Date	Category	Contract Start Date	Contract End Date	Value (AUD)	ATM ID	Supplier Name	LastUpdated	
-There are no results that match your selection. 

--- a/admin/partialdata/import.php
+++ b/admin/partialdata/import.php
@@ -1,9 +1,13 @@
 <?php
-include_once ("../lib/common.inc.php");
+include_once ("../../lib/common.inc.php");
 function processFile($fpath, $tablename)
 {
 	global $conn;
+	echo " ============== $fpath  ============== <br>";
+	flush();
 	$row = 1;
+	$success = 0;
+	$dupes = 0;
 	$handle = fopen($fpath, "r");
 	//"t" mode string translates windows line breaks to unix
 	$datamapping0507 = array(
@@ -16,7 +20,8 @@
 		"Title" => "description",
 		"Category" => "category",
 		"ATM ID" => "atmID",
-		"LastUpdated" => "",
+		"Supplier Name" => "supplierName",
+		"LastUpdated" => "amendDate",
 		"" => ""
 	);
 	$headers;
@@ -32,7 +37,7 @@
 		"value",
 		"atmID",
 		"supplierName",
-		"LastUpdated"
+		"amendDate"
 	);
 	if ($tablename == "contractnotice") {
 		$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
@@ -50,7 +55,7 @@
 		}
 		elseif ($row > 3) {
 			if ($num > count($datamapping0507)) {
-				die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data));
+				die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . "data:" .$num. print_r($data ,true). "mapping:" . count($datamapping0507). print_r($datamapping0507 ,true));
 			}
 			$contractNoticeInsert = Array();
 			$contractNoticeInsert[] = $fpath;
@@ -98,6 +103,7 @@
 				$contractNoticeInsertQ->execute($contractNoticeInsert);
 				$errors = $conn->errorInfo();
 				if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+					$dupes++;
 				}
 				elseif ($errors[1] == 0) {
 					$success++;
@@ -116,6 +122,9 @@
 		$row++;
 	}
 	fclose($handle);
+	echo " $dupes duplicate records<br>";
+		echo " $success records successfully created<br>";
+	flush();
 	return $success;
 }
 $path = './';
@@ -129,6 +138,7 @@
 		while (false !== ($fname = readdir($dhandle))) {
 			if (($fname != '.') && ($fname != '..')) {
 				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+				processFile($path . $fname, "contractnotice");
 			}
 		}
 	}
@@ -136,11 +146,9 @@
 else {
 	$success = 0;
 	$fname = $_REQUEST["fname"];
-	echo " ============== $fname  ============== <br>";
-	flush();
+	
 	$success+= processFile($path . $fname, "contractnotice");
-	echo "<br> $success records successfully created";
-	flush();
+
 }
 ?>
 

--- /dev/null
+++ b/admin/partialdata/phpQuery-onefile.php
@@ -1,1 +1,5702 @@
+<?php
+/**
+ * phpQuery is a server-side, chainable, CSS3 selector driven
+ * Document Object Model (DOM) API based on jQuery JavaScript Library.
+ *
+ * @version 0.9.5
+ * @link http://code.google.com/p/phpquery/
+ * @link http://phpquery-library.blogspot.com/
+ * @link http://jquery.com/
+ * @author Tobiasz Cudnik <tobiasz.cudnik/gmail.com>
+ * @license http://www.opensource.org/licenses/mit-license.php MIT License
+ * @package phpQuery
+ */
 
+// class names for instanceof
+// TODO move them as class constants into phpQuery
+define('DOMDOCUMENT', 'DOMDocument');
+define('DOMELEMENT', 'DOMElement');
+define('DOMNODELIST', 'DOMNodeList');
+define('DOMNODE', 'DOMNode');
+
+/**
+ * DOMEvent class.
+ *
+ * Based on
+ * @link http://developer.mozilla.org/En/DOM:event
+ * @author Tobiasz Cudnik <tobiasz.cudnik/gmail.com>
+ * @package phpQuery
+ * @todo implement ArrayAccess ?
+ */
+class DOMEvent {
+	/**
+	 * Returns a boolean indicating whether the event bubbles up through the DOM or not.
+	 *
+	 * @var unknown_type
+	 */
+	public $bubbles = true;
+	/**
+	 * Returns a boolean indicating whether the event is cancelable.
+	 *
+	 * @var unknown_type
+	 */
+	public $cancelable = true;
+	/**
+	 * Returns a reference to the currently registered target for the event.
+	 *
+	 * @var unknown_type
+	 */
+	public $currentTarget;
+	/**
+	 * Returns detail about the event, depending on the type of event.
+	 *
+	 * @var unknown_type
+	 * @link http://developer.mozilla.org/en/DOM/event.detail
+	 */
+	public $detail;	// ???
+	/**
+	 * Used to indicate which phase of the event flow is currently being evaluated.
+	 *
+	 * NOT IMPLEMENTED
+	 *
+	 * @var unknown_type
+	 * @link http://developer.mozilla.org/en/DOM/event.eventPhase
+	 */
+	public $eventPhase;	// ???
+	/**
+	 * The explicit original target of the event (Mozilla-specific).
+	 *
+	 * NOT IMPLEMENTED
+	 *
+	 * @var unknown_type
+	 */
+	public $explicitOriginalTarget; // moz only
+	/**
+	 * The original target of the event, before any retargetings (Mozilla-specific).
+	 *
+	 * NOT IMPLEMENTED
+	 *
+	 * @var unknown_type
+	 */
+	public $originalTarget;	// moz only
+	/**
+	 * Identifies a secondary target for the event.
+	 *
+	 * @var unknown_type
+	 */
+	public $relatedTarget;
+	/**
+	 * Returns a reference to the target to which the event was originally dispatched.
+	 *
+	 * @var unknown_type
+	 */
+	public $target;
+	/**
+	 * Returns the time that the event was created.
+	 *
+	 * @var unknown_type
+	 */
+	public $timeStamp;
+	/**
+	 * Returns the name of the event (case-insensitive).
+	 */
+	public $type;
+	public $runDefault = true;
+	public $data = null;
+	public function __construct($data) {
+		foreach($data as $k => $v) {
+			$this->$k = $v;
+		}
+		if (! $this->timeStamp)
+			$this->timeStamp = time();
+	}
+	/**
+	 * Cancels the event (if it is cancelable).
+	 *
+	 */
+	public function preventDefault() {
+		$this->runDefault = false;
+	}
+	/**
+	 * Stops the propagation of events further along in the DOM.
+	 *
+	 */
+	public function stopPropagation() {
+		$this->bubbles = false;
+	}
+}
+
+
+/**
+ * DOMDocumentWrapper class simplifies work with DOMDocument.
+ *
+ * Know bug:
+ * - in XHTML fragments, <br /> changes to <br clear="none" />
+ *
+ * @todo check XML catalogs compatibility
+ * @author Tobiasz Cudnik <tobiasz.cudnik/gmail.com>
+ * @package phpQuery
+ */
+class DOMDocumentWrapper {
+	/**
+	 * @var DOMDocument
+	 */
+	public $document;
+	public $id;
+	/**
+	 * @todo Rewrite as method and quess if null.
+	 * @var unknown_type
+	 */
+	public $contentType = '';
+	public $xpath;
+	public $uuid = 0;
+	public $data = array();
+	public $dataNodes = array();
+	public $events = array();
+	public $eventsNodes = array();
+	public $eventsGlobal = array();
+	/**
+	 * @TODO iframes support http://code.google.com/p/phpquery/issues/detail?id=28
+	 * @var unknown_type
+	 */
+	public $frames = array();
+	/**
+	 * Document root, by default equals to document itself.
+	 * Used by documentFragments.
+	 *
+	 * @var DOMNode
+	 */
+	public $root;
+	public $isDocumentFragment;
+	public $isXML = false;
+	public $isXHTML = false;
+	public $isHTML = false;
+	public $charset;
+	public function __construct($markup = null, $contentType = null, $newDocumentID = null) {
+		if (isset($markup))
+			$this->load($markup, $contentType, $newDocumentID);
+		$this->id = $newDocumentID
+			? $newDocumentID
+			: md5(microtime());
+	}
+	public function load($markup, $contentType = null, $newDocumentID = null) {
+//		phpQuery::$documents[$id] = $this;
+		$this->contentType = strtolower($contentType);
+		if ($markup instanceof DOMDOCUMENT) {
+			$this->document = $markup;
+			$this->root = $this->document;
+			$this->charset = $this->document->encoding;
+			// TODO isDocumentFragment
+		} else {
+			$loaded = $this->loadMarkup($markup);
+		}
+		if ($loaded) {
+//			$this->document->formatOutput = true;
+			$this->document->preserveWhiteSpace = true;
+			$this->xpath = new DOMXPath($this->document);
+			$this->afterMarkupLoad();
+			return true;
+			// remember last loaded document
+//			return phpQuery::selectDocument($id);
+		}
+		return false;
+	}
+	protected function afterMarkupLoad() {
+		if ($this->isXHTML) {
+			$this->xpath->registerNamespace("html", "http://www.w3.org/1999/xhtml");
+		}
+	}
+	protected function loadMarkup($markup) {
+		$loaded = false;
+		if ($this->contentType) {
+			self::debug("Load markup for content type {$this->contentType}");
+			// content determined by contentType
+			list($contentType, $charset) = $this->contentTypeToArray($this->contentType);
+			switch($contentType) {
+				case 'text/html':
+					phpQuery::debug("Loading HTML, content type '{$this->contentType}'");
+					$loaded = $this->loadMarkupHTML($markup, $charset);
+				break;
+				case 'text/xml':
+				case 'application/xhtml+xml':
+					phpQuery::debug("Loading XML, content type '{$this->contentType}'");
+					$loaded = $this->loadMarkupXML($markup, $charset);
+				break;
+				default:
+					// for feeds or anything that sometimes doesn't use text/xml
+					if (strpos('xml', $this->contentType) !== false) {
+						phpQuery::debug("Loading XML, content type '{$this->contentType}'");
+						$loaded = $this->loadMarkupXML($markup, $charset);
+					} else
+						phpQuery::debug("Could not determine document type from content type '{$this->contentType}'");
+			}
+		} else {
+			// content type autodetection
+			if ($this->isXML($markup)) {
+				phpQuery::debug("Loading XML, isXML() == true");
+				$loaded = $this->loadMarkupXML($markup);
+				if (! $loaded && $this->isXHTML) {
+					phpQuery::debug('Loading as XML failed, trying to load as HTML, isXHTML == true');
+					$loaded = $this->loadMarkupHTML($markup);
+				}
+			} else {
+				phpQuery::debug("Loading HTML, isXML() == false");
+				$loaded = $this->loadMarkupHTML($markup);
+			}
+		}
+		return $loaded;
+	}
+	protected function loadMarkupReset() {
+		$this->isXML = $this->isXHTML = $this->isHTML = false;
+	}
+	protected function documentCreate($charset, $version = '1.0') {
+		if (! $version)
+			$version = '1.0';
+		$this->document = new DOMDocument($version, $charset);
+		$this->charset = $this->document->encoding;
+//		$this->document->encoding = $charset;
+		$this->document->formatOutput = true;
+		$this->document->preserveWhiteSpace = true;
+	}
+	protected function loadMarkupHTML($markup, $requestedCharset = null) {
+		if (phpQuery::$debug)
+			phpQuery::debug('Full markup load (HTML): '.substr($markup, 0, 250));
+		$this->loadMarkupReset();
+		$this->isHTML = true;
+		if (!isset($this->isDocumentFragment))
+			$this->isDocumentFragment = self::isDocumentFragmentHTML($markup);
+		$charset = null;
+		$documentCharset = $this->charsetFromHTML($markup);
+		$addDocumentCharset = false;
+		if ($documentCharset) {
+			$charset = $documentCharset;
+			$markup = $this->charsetFixHTML($markup);
+		} else if ($requestedCharset) {
+			$charset = $requestedCharset;
+		}
+		if (! $charset)
+			$charset = phpQuery::$defaultCharset;
+		// HTTP 1.1 says that the default charset is ISO-8859-1
+		// @see http://www.w3.org/International/O-HTTP-charset
+		if (! $documentCharset) {
+			$documentCharset = 'ISO-8859-1';
+			$addDocumentCharset = true;	
+		}
+		// Should be careful here, still need 'magic encoding detection' since lots of pages have other 'default encoding'
+		// Worse, some pages can have mixed encodings... we'll try not to worry about that
+		$requestedCharset = strtoupper($requestedCharset);
+		$documentCharset = strtoupper($documentCharset);
+		phpQuery::debug("DOC: $documentCharset REQ: $requestedCharset");
+		if ($requestedCharset && $documentCharset && $requestedCharset !== $documentCharset) {
+			phpQuery::debug("CHARSET CONVERT");
+			// Document Encoding Conversion
+			// http://code.google.com/p/phpquery/issues/detail?id=86
+			if (function_exists('mb_detect_encoding')) {
+				$possibleCharsets = array($documentCharset, $requestedCharset, 'AUTO');
+				$docEncoding = mb_detect_encoding($markup, implode(', ', $possibleCharsets));
+				if (! $docEncoding)
+					$docEncoding = $documentCharset; // ok trust the document
+				phpQuery::debug("DETECTED '$docEncoding'");
+				// Detected does not match what document says...
+				if ($docEncoding !== $documentCharset) {
+					// Tricky..
+				}
+				if ($docEncoding !== $requestedCharset) {
+					phpQuery::debug("CONVERT $docEncoding => $requestedCharset");
+					$markup = mb_convert_encoding($markup, $requestedCharset, $docEncoding);
+					$markup = $this->charsetAppendToHTML($markup, $requestedCharset);
+					$charset = $requestedCharset;
+				}
+			} else {
+				phpQuery::debug("TODO: charset conversion without mbstring...");
+			}
+		}
+		$return = false;
+		if ($this->isDocumentFragment) {
+			phpQuery::debug("Full markup load (HTML), DocumentFragment detected, using charset '$charset'");
+			$return = $this->documentFragmentLoadMarkup($this, $charset, $markup);
+		} else {
+			if ($addDocumentCharset) {
+				phpQuery::debug("Full markup load (HTML), appending charset: '$charset'");
+				$markup = $this->charsetAppendToHTML($markup, $charset);
+			}
+			phpQuery::debug("Full markup load (HTML), documentCreate('$charset')");
+			$this->documentCreate($charset);
+			$return = phpQuery::$debug === 2
+				? $this->document->loadHTML($markup)
+				: @$this->document->loadHTML($markup);
+			if ($return)
+				$this->root = $this->document;
+		}
+		if ($return && ! $this->contentType)
+			$this->contentType = 'text/html';
+		return $return;
+	}
+	protected function loadMarkupXML($markup, $requestedCharset = null) {
+		if (phpQuery::$debug)
+			phpQuery::debug('Full markup load (XML): '.substr($markup, 0, 250));
+		$this->loadMarkupReset();
+		$this->isXML = true;
+		// check agains XHTML in contentType or markup
+		$isContentTypeXHTML = $this->isXHTML();
+		$isMarkupXHTML = $this->isXHTML($markup);
+		if ($isContentTypeXHTML || $isMarkupXHTML) {
+			self::debug('Full markup load (XML), XHTML detected');
+			$this->isXHTML = true;
+		}
+		// determine document fragment
+		if (! isset($this->isDocumentFragment))
+			$this->isDocumentFragment = $this->isXHTML
+				? self::isDocumentFragmentXHTML($markup)
+				: self::isDocumentFragmentXML($markup);
+		// this charset will be used
+		$charset = null;
+		// charset from XML declaration @var string
+		$documentCharset = $this->charsetFromXML($markup);
+		if (! $documentCharset) {
+			if ($this->isXHTML) {
+				// this is XHTML, try to get charset from content-type meta header
+				$documentCharset = $this->charsetFromHTML($markup);
+				if ($documentCharset) {
+					phpQuery::debug("Full markup load (XML), appending XHTML charset '$documentCharset'");
+					$this->charsetAppendToXML($markup, $documentCharset);
+					$charset = $documentCharset;
+				}
+			}
+			if (! $documentCharset) {
+				// if still no document charset...
+				$charset = $requestedCharset;
+			}
+		} else if ($requestedCharset) {
+			$charset = $requestedCharset;
+		}
+		if (! $charset) {
+			$charset = phpQuery::$defaultCharset;
+		}
+		if ($requestedCharset && $documentCharset && $requestedCharset != $documentCharset) {
+			// TODO place for charset conversion
+//			$charset = $requestedCharset;
+		}
+		$return = false;
+		if ($this->isDocumentFragment) {
+			phpQuery::debug("Full markup load (XML), DocumentFragment detected, using charset '$charset'");
+			$return = $this->documentFragmentLoadMarkup($this, $charset, $markup);
+		} else {
+			// FIXME ???
+			if ($isContentTypeXHTML && ! $isMarkupXHTML)
+			if (! $documentCharset) {
+				phpQuery::debug("Full markup load (XML), appending charset '$charset'");
+				$markup = $this->charsetAppendToXML($markup, $charset);
+			}
+			// see http://pl2.php.net/manual/en/book.dom.php#78929
+			// LIBXML_DTDLOAD (>= PHP 5.1)
+			// does XML ctalogues works with LIBXML_NONET
+	//		$this->document->resolveExternals = true;
+			// TODO test LIBXML_COMPACT for performance improvement
+			// create document
+			$this->documentCreate($charset);
+			if (phpversion() < 5.1) {
+				$this->document->resolveExternals = true;
+				$return = phpQuery::$debug === 2
+					? $this->document->loadXML($markup)
+					: @$this->document->loadXML($markup);
+			} else {
+				/** @link http://pl2.php.net/manual/en/libxml.constants.php */
+				$libxmlStatic = phpQuery::$debug === 2
+					? LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET
+					: LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOERROR;
+				$return = $this->document->loadXML($markup, $libxmlStatic);
+// 				if (! $return)
+// 					$return = $this->document->loadHTML($markup);
+			}
+			if ($return)
+				$this->root = $this->document;
+		}
+		if ($return) {
+			if (! $this->contentType) {
+				if ($this->isXHTML)
+					$this->contentType = 'application/xhtml+xml';
+				else
+					$this->contentType = 'text/xml';
+			}
+			return $return;
+		} else {
+			throw new Exception("Error loading XML markup");
+		}
+	}
+	protected function isXHTML($markup = null) {
+		if (! isset($markup)) {
+			return strpos($this->contentType, 'xhtml') !== false;
+		}
+		// XXX ok ?
+		return strpos($markup, "<!DOCTYPE html") !== false;
+//		return stripos($doctype, 'xhtml') !== false;
+//		$doctype = isset($dom->doctype) && is_object($dom->doctype)
+//			? $dom->doctype->publicId
+//			: self::$defaultDoctype;
+	}
+	protected function isXML($markup) {
+//		return strpos($markup, '<?xml') !== false && stripos($markup, 'xhtml') === false;
+		return strpos(substr($markup, 0, 100), '<'.'?xml') !== false;
+	}
+	protected function contentTypeToArray($contentType) {
+		$matches = explode(';', trim(strtolower($contentType)));
+		if (isset($matches[1])) {
+			$matches[1] = explode('=', $matches[1]);
+			// strip 'charset='
+			$matches[1] = isset($matches[1][1]) && trim($matches[1][1])
+				? $matches[1][1]
+				: $matches[1][0];
+		} else
+			$matches[1] = null;
+		return $matches;
+	}
+	/**
+	 *
+	 * @param $markup
+	 * @return array contentType, charset
+	 */
+	protected function contentTypeFromHTML($markup) {
+		$matches = array();
+		// find meta tag
+		preg_match('@<meta[^>]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i',
+			$markup, $matches
+		);
+		if (! isset($matches[0]))
+			return array(null, null);
+		// get attr 'content'
+		preg_match('@content\\s*=\\s*(["|\'])(.+?)\\1@', $matches[0], $matches);
+		if (! isset($matches[0]))
+			return array(null, null);
+		return $this->contentTypeToArray($matches[2]);
+	}
+	protected function charsetFromHTML($markup) {
+		$contentType = $this->contentTypeFromHTML($markup);
+		return $contentType[1];
+	}
+	protected function charsetFromXML($markup) {
+		$matches;
+		// find declaration
+		preg_match('@<'.'?xml[^>]+encoding\\s*=\\s*(["|\'])(.*?)\\1@i',
+			$markup, $matches
+		);
+		return isset($matches[2])
+			? strtolower($matches[2])
+			: null;
+	}
+	/**
+	 * Repositions meta[type=charset] at the start of head. Bypasses DOMDocument bug.
+	 *
+	 * @link http://code.google.com/p/phpquery/issues/detail?id=80
+	 * @param $html
+	 */
+	protected function charsetFixHTML($markup) {
+		$matches = array();
+		// find meta tag
+		preg_match('@\s*<meta[^>]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i',
+			$markup, $matches, PREG_OFFSET_CAPTURE
+		);
+		if (! isset($matches[0]))
+			return;
+		$metaContentType = $matches[0][0];
+		$markup = substr($markup, 0, $matches[0][1])
+			.substr($markup, $matches[0][1]+strlen($metaContentType));
+		$headStart = stripos($markup, '<head>');
+		$markup = substr($markup, 0, $headStart+6).$metaContentType
+			.substr($markup, $headStart+6);
+		return $markup;
+	}
+	protected function charsetAppendToHTML($html, $charset, $xhtml = false) {
+		// remove existing meta[type=content-type]
+		$html = preg_replace('@\s*<meta[^>]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i', '', $html);
+		$meta = '<meta http-equiv="Content-Type" content="text/html;charset='
+			.$charset.'" '
+			.($xhtml ? '/' : '')
+			.'>';
+		if (strpos($html, '<head') === false) {
+			if (strpos($hltml, '<html') === false) {
+				return $meta.$html;
+			} else {
+				return preg_replace(
+					'@<html(.*?)(?(?<!\?)>)@s',
+					"<html\\1><head>{$meta}</head>",
+					$html
+				);
+			}
+		} else {
+			return preg_replace(
+				'@<head(.*?)(?(?<!\?)>)@s',
+				'<head\\1>'.$meta,
+				$html
+			);
+		}
+	}
+	protected function charsetAppendToXML($markup, $charset) {
+		$declaration = '<'.'?xml version="1.0" encoding="'.$charset.'"?'.'>';
+		return $declaration.$markup;
+	}
+	public static function isDocumentFragmentHTML($markup) {
+		return stripos($markup, '<html') === false && stripos($markup, '<!doctype') === false;
+	}
+	public static function isDocumentFragmentXML($markup) {
+		return stripos($markup, '<'.'?xml') === false;
+	}
+	public static function isDocumentFragmentXHTML($markup) {
+		return self::isDocumentFragmentHTML($markup);
+	}
+	public function importAttr($value) {
+		// TODO
+	}
+	/**
+	 *
+	 * @param $source
+	 * @param $target
+	 * @param $sourceCharset
+	 * @return array Array of imported nodes.
+	 */
+	public function import($source, $sourceCharset = null) {
+		// TODO charset conversions
+		$return = array();
+		if ($source instanceof DOMNODE && !($source instanceof DOMNODELIST))
+			$source = array($source);
+//		if (is_array($source)) {
+//			foreach($source as $node) {
+//				if (is_string($node)) {
+//					// string markup
+//					$fake = $this->documentFragmentCreate($node, $sourceCharset);
+//					if ($fake === false)
+//						throw new Exception("Error loading documentFragment markup");
+//					else
+//						$return = array_merge($return, 
+//							$this->import($fake->root->childNodes)
+//						);
+//				} else {
+//					$return[] = $this->document->importNode($node, true);
+//				}
+//			}
+//			return $return;
+//		} else {
+//			// string markup
+//			$fake = $this->documentFragmentCreate($source, $sourceCharset);
+//			if ($fake === false)
+//				throw new Exception("Error loading documentFragment markup");
+//			else
+//				return $this->import($fake->root->childNodes);
+//		}
+		if (is_array($source) || $source instanceof DOMNODELIST) {
+			// dom nodes
+			self::debug('Importing nodes to document');
+			foreach($source as $node)
+				$return[] = $this->document-