Aşağıdaki get_keywords fonksiyonu verilen metinde ençok geçen kelimeleri bulup sonucu meta keyword olarak geri döndürüyor. İkinci parametreye verilen değer kaç keyword’ün geriye döndürüleceğini belirliyor.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
<?php function filter( $item ) { if (strlen($item) <= 3) return false; return true; } function strcount( $item ) { GLOBAL $content; return Array(substr_count($content, $item), $item); } function html2txt($document){ $search = array('@<script[^>]*?>.*?</script>@si', // Strip out javascript '@<[\/\!]*?[^<>]*?>@si', // Strip out HTML tags '@<style[^>]*?>.*?</style>@siU', // Strip style tags properly '@<![\s\S]*?--[ \t\n\r]*>@' // Strip multi-line comments including CDATA ); $text = preg_replace($search, '', $document); return $text; } /** * Remove HTML tags, including invisible text such as style and * script code, and embedded objects. Add line breaks around * block-level tags to prevent word joining after tag removal. */ function strip_html_tags( $text ) { $text = preg_replace( array( // Remove invisible content '@<head[^>]*?>.*?</head>@siu', '@<style[^>]*?>.*?</style>@siu', '@<script[^>]*?.*?</script>@siu', '@<object[^>]*?.*?</object>@siu', '@<embed[^>]*?.*?</embed>@siu', '@<applet[^>]*?.*?</applet>@siu', '@<noframes[^>]*?.*?</noframes>@siu', '@<noscript[^>]*?.*?</noscript>@siu', '@<noembed[^>]*?.*?</noembed>@siu', // Add line breaks before and after blocks '@</?((address)|(blockquote)|(center)|(del))@iu', '@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu', '@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu', '@</?((table)|(th)|(td)|(caption))@iu', '@</?((form)|(button)|(fieldset)|(legend)|(input))@iu', '@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu', '@</?((frameset)|(frame)|(iframe))@iu', ), array( ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", ), $text ); return strip_tags( $text ); } function get_keywords($content, $count) { $content = strip_html_tags($content); $content = html_entity_decode($content); $arr = str_word_count( $content, 1); $arr = array_filter($arr, 'filter'); $arr = array_unique($arr); $arr = array_map('strcount', $arr); arsort($arr); foreach($arr as $item) { if ($i < $count) $ret_arr[] = $item[1]; $i++; } return implode(',', $ret_arr); } $content = file_get_contents('http://www.tankado.com/'); echo get_keywords($content, 5); ?> |