欢迎各位兄弟 发布技术文章

这里的技术是共享的

You are here

判断来自哪个搜引擎 进行统计 有大用 有大大用

searchengine.js
$(document).ready(function(){
var shangyigeurl = document.referrer;

var littleShangyigeurl = shangyigeurl.toLowerCase();
//假如是谷歌 百度 搜狗(搜搜) 360
   
if(littleShangyigeurl.indexOf('google.com')>0 || littleShangyigeurl.indexOf('baidu.com')>0 || littleShangyigeurl.indexOf('sogou')>0 || littleShangyigeurl.indexOf('so.com')>0)
{
$.ajax({
type:"POST",
url: "/php/searchengine.php",
cache: false,
data: {"shangyigeurl" : shangyigeurl},
error: function(){alert('error');},
success: function(result)
{
}
});
}

});

searchengine.php

<?php

require_once(dirname(__FILE__)."/config.php");

$userip = GetIP();

//$url=isset($_SERVER['HTTP_REFERER'])?$_SERVER['HTTP_REFERER']:'';//获取入站url。
//var_dump($_SERVER['HTTP_REFERER']);
$url = $_POST['shangyigeurl'];
$search_1 = "google.com"; //q=
$search_2 = "baidu.com"; //wd=
$search_3 = "sogou.com";//搜狗 query=
$search_4 = "so.com";//360 q=

$google = preg_match("/\b{$search_1}\b/",$url);//记录匹配情况,用于入站判断。
$baidu  = preg_match("/\b{$search_2}\b/",$url);
$sogou  = preg_match("/\b{$search_3}\b/",$url);
$so     = preg_match("/\b{$search_4}\b/",$url);
$s_s_keyword="";
$yingQing = '';



if($google)
{
   //来自google
   $s_s_keyword=get_keyword($url,'q=');//关键词前的字符为“q=”。
   $s_s_keyword=urldecode($s_s_keyword);
   if(isUtf8($s_s_keyword)){
      $s_s_keyword=iconv("UTF-8","GBK",$s_s_keyword);//引擎为gbk
   }
   
   $yingQing = '谷歌';
}
else if($baidu)
{
   //来自百度
   //百度 好像 有时是 word=  有时是 wd=
   
   if(stripos($url,'word=')>0){
      $s_s_keyword=get_keyword($url,'word=');
   }
   else{
      $s_s_keyword=get_keyword($url,'wd=');//关键词前的字符为“wd=”。
   }
   
   $s_s_keyword=urldecode($s_s_keyword);
   if(isUtf8($s_s_keyword)){
      $s_s_keyword=iconv("UTF-8","GBK",$s_s_keyword);//引擎为gbk
   }
   
   $yingQing = '百度';
}

else if($sogou)
{
   //来自搜狗 搜搜
   $s_s_keyword=get_keyword($url,'query=');//关键词前的字符为“wd=”。
   $s_s_keyword=urldecode($s_s_keyword);
   if(isUtf8($s_s_keyword)){
      $s_s_keyword=iconv("UTF-8","GBK",$s_s_keyword);//引擎为gbk
   }
   
   $yingQing = '搜狗';
}

else if($so)
{
   //来自360
   $s_s_keyword=get_keyword($url,'q=');//关键词前的字符为“q=”。
   $s_s_keyword=urldecode($s_s_keyword);
   if(isUtf8($s_s_keyword)){
      $s_s_keyword=iconv("UTF-8","GBK",$s_s_keyword);//引擎为gbk
   }
   $yingQing = '360';
}


//下面是保存关键词
if($google || $baidu || $sogou || $so )
{
  $currTime= time();
  //关键词文件
  $path='baoming/'.$userip.'-key.txt';
  makeCache($currTime,$path,$yingQing.':'.$s_s_keyword."  从搜索引擎网址:".$url);
 
}

//生成缓存
function makeCache($currTime, $cacheFile, $yingQing_s_s_keyword)
{
     $fp = @fopen($cacheFile,'w') or die('更新文件失败');
     fwrite($fp,$currTime.','.$yingQing_s_s_keyword);
     fclose($fp);
}


/*
获取来自<a href="javascript:;" onclick="show_ajax(this)" class="keylink">搜索引擎</a>入站时的关键词。
*/
// 函数作用:从url中提取关键词。参数说明:url及关键词前的字符。
function get_keyword($url,$kw_start)
{
   $start=stripos($url,$kw_start);
   $url=substr($url,$start+strlen($kw_start));
   $start=stripos($url,'&');
   if ($start>0)
   {
       $start=stripos($url,'&');
       $s_s_keyword=substr($url,0,$start);
   }
   else
   {
       $s_s_keyword=substr($url,0);
   }
   return $s_s_keyword;
}

function isUtf8($str)
{
 $score =   utf8_probability($str);
 if($score>90)return true;
 return 0;
}

function utf8_probability($rawtextstr) {
$score = 0;
$i = 0;
$rawtextlen = 0;
$goodbytes = 0;
$asciibytes = 0;
$rawtextarray = preg_split("//",$rawtextstr,-1, PREG_SPLIT_NO_EMPTY); //转换成char数组,如果是php5,则可使用str_split
$rawtext = array();
//var_dump($rawtextarray);die;
for($i=0;$i<count($rawtextarray);$i++)
$rawtext[] = ord($rawtextarray[$i]); //ord(char)
// Maybe also use UTF8 Byte Order Mark(BOM): EF BB BF
//BOM,某些utf8文件流的首3个字节,可以表示这个文件的编码方式
// Check to see if characters fit into acceptable ranges
//print_r($rawtext);
$rawtextlen = strlen($rawtextstr);
for ($i = 0; $i < $rawtextlen; $i++) {
if ($rawtext[$i] < 0x80) { // One byte
   $asciibytes++; // Ignore ASCII, can throw off count
} else if (0xC0 <= $rawtext[$i] && $rawtext[$i] <= 0xDF && // Two bytes
$i+1 < $rawtextlen && 0x80 <= $rawtext[$i+1] && $rawtext[$i+1] <= 0xBF) {
$goodbytes += 2; $i++;
} else if (0xE0 <= $rawtext[$i] && $rawtext[$i] <= 0xEF && // Three bytes
$i+2 < $rawtextlen && 0x80 <= $rawtext[$i+1] && $rawtext[$i+1] <= 0xBF &&
0x80 <= $rawtext[$i+2] && $rawtext[$i+2] <= 0xBF) {
$goodbytes += 3; $i+=2;
}
//if you want check just a few ,you may stop here with a score make.
//or you will be delayed when you meet lots of big files.
}
//ascii is sub of utf8
if ($asciibytes == $rawtextlen) { return 0; }
$score = (int)(100 * ($goodbytes/($rawtextlen-$asciibytes)));
// If not above 98, reduce to zero to prevent coincidental matches
if ($score > 98) {
return $score;
} else if ($score > 95 && $goodbytes > 30) {
// Allows for some (few) bad formed sequences
return $score;
} else {
return 0;
}
}
?>


普通分类: