Metinden Anahtar Kelime (keyword) Üretme

3
330

Aşağıdaki get_keywords fonksiyonu verilen metinde ençok geçen kelimeleri bulup sonucu meta keyword olarak geri döndürüyor. İkinci parametreye verilen değer kaç keyword’ün geriye döndürüleceğini belirliyor.

<?php
	function filter( $item )
	{
		if (strlen($item) <= 3)
			return false;
		return true;
	}
	
	function strcount( $item )
	{
		GLOBAL $content;
		return Array(substr_count($content, $item), $item);
	}
	
	function html2txt($document){
		$search = array('@<script[^>]*?>.*?</script>@si',  // Strip out javascript
					   '@<[\/\!]*?[^<>]*?>@si',            // Strip out HTML tags
					   '@<style[^>]*?>.*?</style>@siU',    // Strip style tags properly
					   '@<![\s\S]*?--[ \t\n\r]*>@'         // Strip multi-line comments including CDATA
		);
		$text = preg_replace($search, '', $document);
		return $text;
	} 
	
	/**
	 * Remove HTML tags, including invisible text such as style and
	 * script code, and embedded objects.  Add line breaks around
	 * block-level tags to prevent word joining after tag removal.
	 */
	function strip_html_tags( $text )
	{
		$text = preg_replace(
			array(
			  // Remove invisible content
				'@<head[^>]*?>.*?</head>@siu',
				'@<style[^>]*?>.*?</style>@siu',
				'@<script[^>]*?.*?</script>@siu',
				'@<object[^>]*?.*?</object>@siu',
				'@<embed[^>]*?.*?</embed>@siu',
				'@<applet[^>]*?.*?</applet>@siu',
				'@<noframes[^>]*?.*?</noframes>@siu',
				'@<noscript[^>]*?.*?</noscript>@siu',
				'@<noembed[^>]*?.*?</noembed>@siu',
			  // Add line breaks before and after blocks
				'@</?((address)|(blockquote)|(center)|(del))@iu',
				'@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu',
				'@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu',
				'@</?((table)|(th)|(td)|(caption))@iu',
				'@</?((form)|(button)|(fieldset)|(legend)|(input))@iu',
				'@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu',
				'@</?((frameset)|(frame)|(iframe))@iu',
			),
			array(
				' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
				"\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0",
				"\n\$0", "\n\$0",
			),
			$text );
		return strip_tags( $text );
	}	
	
	function get_keywords($content, $count)
	{				
		$content = strip_html_tags($content);
		$content = html_entity_decode($content);
		$arr = str_word_count( $content, 1);	
		$arr = array_filter($arr, 'filter');
		$arr = array_unique($arr);
		$arr = array_map('strcount', $arr);
		arsort($arr);
		foreach($arr as $item)
		{
			if ($i < $count)
			$ret_arr[] = $item[1];
			$i++;
		}
		return implode(',', $ret_arr);
	}
	
	$content = file_get_contents('http://www.tankado.com/');	
	echo get_keywords($content, 5);
?>

CEVAP VER

This site uses Akismet to reduce spam. Learn how your comment data is processed.