<?php
/**
 * Pars-pipe
 * Is a useful class to fetch and manipulate data from any web-page.
 * 
 * @version 0.0.1
 * @link http://code.google.com/p/pars-pipe/
 * @author Aram Alipoor <aram.alipoor/gmail.com>
 * @license http://www.opensource.org/licenses/mit-license.php MIT License
 * @package parsPipeCore
 */

require_once(dirname(__FILE__).'/php.query.php');

define('HTML', 201);
define('PLAIN_TEXT', 202);
define('MULTI_ITEM', 301);
define('SINGLE_ITEM', 302);

/**
 * pipe Class main class for processing and manipulating data using rules.
 *
 * @package parsPipeCore
 */
class pipe
{
	/**
	 * Holds rules queue which must be proccessed agaist data.
	 *
	 * @var array
	 */
	private $queue = array();

	/**
	 * Append new rule to rules queue.
	 *
	 * @param rules	rule Rules array or one rule which you want to push to the queue
	 */
	public function append($rules)
	{
		if(is_array($rules))
		{
			foreach($rules as $rule)
			{
				$this->queue[] = $rule;
			}
		}
		else
		{
			$this->queue[] = $rules;
		}
	}
	
	/**
	 * Parse the data based on rules queue.
	 *
	 * @param data String The raw data you want to apply rules on.
	 * @return array The array result of fetched data.
	 */
	public function parse(&$data)
	{
		$ret =& $data;
		
		foreach($this->queue as $rule)
		{
			if(!is_array($ret))
			{
				$ret = $rule->apply($ret);
			}
			else
			{
				$tmp = array();
				
				foreach($ret as $item)
				{
					$tmp[] = $rule->apply($item);
				}
				
				$ret =& $tmp;
			}
		}
		
		return $ret;
	}
}

/**
 * rule Class is a base class for any kind of rule,
 * there are already 3 kind of rules slice, regexp, css.
 *
 * @package parsPipeCore
 */
class rule
{
	public $prototype;
	public $type;
	public $crawlers;
	public $loop_type;

	function rule($prototype, $ret_type = HTML, $loop_type = MULTI_ITEM, $crawlers = array())
	{
		$this->prototype = $prototype;
		$this->type = $ret_type;
		$this->loop_type = $loop_type;
		$this->crawlers = $crawlers;
	}
	
	function apply(&$data)
	{
		$items = $this->prototype->apply($data);
		$ret = array();
		
		if($this->loop_type === MULTI_ITEM)
		{
			foreach($items as $item)
			{
				$arr = array();
				
				foreach($this->crawlers as $key => $rule)
				{
					$arr[$key] = $rule->apply($item);
				}
				
				$ret[] = $arr;
			}

				if(sizeOf($ret) == 0)
					return $items;
				else
					return $ret;
		}
		else
		{
			if($this->type === HTML)
				return $items[0];
			else
				return trim(strip_tags($items[0]), "\n\r ");
		}
	}
}

/**
 * slice rule Class - is simplest rule-type which will cut
 * data 'from' a string 'to' another string as much as 
 * there is something to slice.
 *
 * For example you want to slice the data between any 
 * '<div class="news-item">' and the following '</div>'
 * 
 * @package parsPipeCore
 */
class slice
{
	private $from;
	private $to;
	
	function slice($from, $to)
	{
		$this->from = $from;
		$this->to = $to;
	}
	
	function apply(&$data)
	{
		$tmp = explode($this->to, $data);
		
		$ret = array();
		
		foreach($tmp as $item)
		{
			$ret[] = substr($item, strpos($item, $this->from));
		}
		
		array_pop($ret);
		
		return $ret;
	}
}

/**
 * regexp rule Class - is a rule-type to manipulate data using
 * a regular expression and preg_match_all. 
 * 
 * For example you want to get href of any anchor
 * you use : new regexp('.*?<a.*href="([^"]*)"[^>]*>(.*?)</a>.*')
 * 
 * @todo Support for those regular expressions that return an array
 * 		 I use this rule-type in google-result-reader example, but in that
 * 		 example this rule returns only one item.
 * 
 * @package parsPipeCore
 */
class regexp
{
	private $refnum;
	private $pattr;
	
	function regexp($pattr, $refnum = 1)
	{
		$this->refnum = $refnum;
		$this->pattr = $pattr;
	}
	
	function apply(&$data)
	{
		preg_match_all('%'.$this->pattr.'%im', $data, $matches);

		return array($matches[1][0]);
		
		if(@isset($matches[0][1]))
		{
			$ret = array();
			
			foreach($matches as $item)
			{
				$ret[] = $item[$this->refnum - 1];
			}
			
			return $ret;
		}
		else 
		{
			return $matches;
		}
		
	}
}

/**
 * css rule Class - is a powerful CSS selector rule
 * you can create rules as complicated as CSS rules.
 * This class is using phpQuery so you need to include its classes.
 * 
 * For example you want to get inside of any DIV which its class is news-item
 * you use : new css('div.news-item')
 * 
 * @package parsPipeCore
 */
class css
{
	private $selector;
	
	function css($selector)
	{
		$this->selector = $selector;
	}
	
	function apply(&$data)
	{
		$doc = phpQuery::newDocumentHTML($data);
		phpQuery::selectDocument($doc);
		
		$ret = array();
		
		foreach($doc[$this->selector] as $element)
		{
			$ret[] = pq($element)->html();
		}
		
		return $ret;
	}
}

?>