Aşağıdaki get_keywords fonksiyonu verilen metinde ençok geçen kelimeleri bulup sonucu meta keyword olarak geri döndürüyor. İkinci parametreye verilen değer kaç keyword’ün geriye döndürüleceğini belirliyor.
<?php function filter( $item ) { if (strlen($item) <= 3) return false; return true; } function strcount( $item ) { GLOBAL $content; return Array(substr_count($content, $item), $item); } function html2txt($document){ $search = array('@<script[^>]*?>.*?</script>@si', // Strip out javascript '@<[\/\!]*?[^<>]*?>@si', // Strip out HTML tags '@<style[^>]*?>.*?</style>@siU', // Strip style tags properly '@<![\s\S]*?--[ \t\n\r]*>@' // Strip multi-line comments including CDATA ); $text = preg_replace($search, '', $document); return $text; } /** * Remove HTML tags, including invisible text such as style and * script code, and embedded objects. Add line breaks around * block-level tags to prevent word joining after tag removal. */ function strip_html_tags( $text ) { $text = preg_replace( array( // Remove invisible content '@<head[^>]*?>.*?</head>@siu', '@<style[^>]*?>.*?</style>@siu', '@<script[^>]*?.*?</script>@siu', '@<object[^>]*?.*?</object>@siu', '@<embed[^>]*?.*?</embed>@siu', '@<applet[^>]*?.*?</applet>@siu', '@<noframes[^>]*?.*?</noframes>@siu', '@<noscript[^>]*?.*?</noscript>@siu', '@<noembed[^>]*?.*?</noembed>@siu', // Add line breaks before and after blocks '@</?((address)|(blockquote)|(center)|(del))@iu', '@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu', '@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu', '@</?((table)|(th)|(td)|(caption))@iu', '@</?((form)|(button)|(fieldset)|(legend)|(input))@iu', '@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu', '@</?((frameset)|(frame)|(iframe))@iu', ), array( ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", ), $text ); return strip_tags( $text ); } function get_keywords($content, $count) { $content = strip_html_tags($content); $content = html_entity_decode($content); $arr = str_word_count( $content, 1); $arr = array_filter($arr, 'filter'); $arr = array_unique($arr); $arr = array_map('strcount', $arr); arsort($arr); foreach($arr as $item) { if ($i < $count) $ret_arr[] = $item[1]; $i++; } return implode(',', $ret_arr); } $content = file_get_contents('https://www.tankado.com/'); echo get_keywords($content, 5); ?>
