ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

百度统计API接口Python简易SDK

2021-10-10 13:34:05  阅读:294  来源: 互联网

标签:count Python visitor site API params date id SDK


由于太久没有动笔所以决定瞎发点东西最近在接入百度统计API,虽然没有对响应结果做进一步处理,还是希望能够减少大家的工作量

百度商业账号接口未对接,百度普通账号所有接口已接入,基于第三方库httpx,所有方法都是异步调用的
如果想要快速改成同步调用,只需替换如下几个关键字

  1. "AsyncClient" -> "Client"
  2. "async " -> ""
  3. "await " -> ""


from httpx import AsyncClient, Response


class BaiduConfig:
    # docs https://tongji.baidu.com/api/manual/Chapter2/openapi.html
    CLIENT_ID = '自行申请'
    CLIENT_SECRET = '自行申请'
    REFRESH_TOKEN = '见百度授权文档'
    ACCESS_TOKEN = '见百度授权文档'


class BaiduStat(AsyncClient):
    # docs https://tongji.baidu.com/api/manual/
    site_id: int = None
    fields = {
        'pv_count': '浏览量(PV)', 'pv_ratio': '浏览量占比', 'visit_count': '来源',
        'visitor_count': '访客数(UV)', 'new_visitor_count': '新访客数',
        'new_visitor_ratio': '新访客比率', 'ip_count': 'IP 数',
        'bounce_ratio': '跳出率', 'avg_visit_time': '平均访问时长',
        'avg_visit_pages': '平均访问页数', 'trans_count': '转化次数',
        'trans_ratio': '转化率', 'visit1_count': '入口页次数',
        'outward_count': '贡献下游浏览量', 'exit_count': '退出页次数',
        'average_stay_time': '平均停留时长', 'exit_ratio': '退出率',
        'out_pv_count': '贡献浏览量',
    }
    area = {
        '全国': 'china', '北京': 'province, 1', '上海': 'province, 2',
        '天津': 'province, 3', '广东': 'province, 4', '福建': 'province, 5',
        '海南': 'province, 8', '安徽': 'province, 9', '贵州': 'province, 10',
        '甘肃': 'province, 11', '广西': 'province, 12', '河北': 'province, 13',
        '河南': 'province, 14', '黑龙江': 'province, 15', '湖北': 'province, 16',
        '湖南': 'province, 17', '吉林': 'province, 18', '江苏': 'province, 19',
        '江西': 'province, 20', '辽宁': 'province, 21', '内蒙古': 'province, 22',
        '宁夏': 'province, 23', '青海': 'province, 24', '山东': 'province, 25',
        '山西': 'province, 26', '陕西': 'province, 27', '四川': 'province, 28',
        '西藏': 'province, 29', '新疆': 'province, 30', '云南': 'province, 31',
        '浙江': 'province, 32', '重庆': 'province, 33', '香港': 'province, 34',
        '台湾': 'province, 35', '澳门': 'province, 36'
    }

    async def get(self, url, *args, **kwargs) -> Response:
        # 增加token过期判断逻辑
        resp = await super().get(url, *args, **kwargs)
        if resp.json().get('error_code') == 111:  # Access token expired
            await self.refresh_token()
            resp = await super().get(url, *args, **kwargs)
        return resp

    async def refresh_token(self):
        url = 'https://openapi.baidu.com/oauth/2.0/token'
        params = {
            'grant_type': 'refresh_token', 'client_id': BaiduConfig.CLIENT_ID,
            'client_secret': BaiduConfig.CLIENT_SECRET,
            'refresh_token': BaiduConfig.REFRESH_TOKEN,
        }
        resp = (await self.get(url, params=params)).json()
        access_token = resp.get('access_token')
        refresh_token = resp.get('refresh_token')
        if access_token and refresh_token:
            with open(__file__, 'r+') as f:
                content = f.read()
                f.seek(0)
                f.truncate()
                content = content.replace(BaiduConfig.ACCESS_TOKEN, access_token)
                content = content.replace(BaiduConfig.REFRESH_TOKEN, refresh_token)
                f.write(content)
            BaiduConfig.ACCESS_TOKEN = access_token
            BaiduConfig.REFRESH_TOKEN = refresh_token

    async def get_site_list(self):
        url = 'https://openapi.baidu.com/rest/2.0/tongji/config/getSiteList'
        params = {'access_token': BaiduConfig.ACCESS_TOKEN}
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_site_id(self, domain=None):
        """获取站点id
        :param domain: 不传入时 选择第一个站点
        :return:
        """
        if self.site_id is None:
            site_resp = await self.get_site_list()
            for site in site_resp.get('list', []):
                if domain is None:
                    self.site_id = site['site_id']
                    break
                if site['domain'] == domain:
                    self.site_id = site['site_id']
                    break
        return self.site_id

    async def get_time_trend_report(self, site_id=None, date_range=None):
        """获取站点趋势数据 浏览量PV、访客数UV、IP数
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        """
        metric = [
            'pv_count', 'visitor_count', 'ip_count', 'bounce_ratio',
            'avg_visit_time', 'trans_count'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'overview/getTimeTrendRpt',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_district_report(self, site_id=None, date_range=None):
        """获取访客地域分布 浏览量PV、访客数UV、IP数
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :return:
        """
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'overview/getDistrictRpt',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': 'pv_count'
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_common_track_report(self, site_id=None, date_range=None):
        """获取访客来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :return:
        """
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'overview/getCommonTrackRpt',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': 'pv_count'
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_trend_analyse(self, site_id=None, date_range=None, gran=None,
                                source=None, clientDevice=None, area=None,
                                visitor=None):
        """获取趋势分析
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param gran: 时间粒度 可选值 day/hour/week/month
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'avg_visit_time', 'avg_visit_pages', 'trans_count', 'trans_ratio',
            'avg_trans_cost', 'income'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'trend/time/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if area:
            params['area'] = self.area.get(area, area)
        if source:
            params['source'] = source
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        if gran:
            params['gran'] = gran
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_latest_visit(self, site_id=None, date_range=None,
                               source=None, clientDevice=None,
                               visitor=None, area=None):
        """获取实时访客
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
        :return:
        """
        metric = ['start_time', 'area', 'source', 'access_page', 'keyword',
                  'searchword', 'is_ad', 'visitorId', 'ip', 'visit_time',
                  'visit_pages']
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'trend/latest/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if source:
            params['source'] = source
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        if area:
            params['area'] = self.area.get(area, area)
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_all(self, site_id=None, date_range=None,
                             viewType=None, clientDevice=None, visitor=None):
        """获取全部来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param viewType: 分类标准 可选值如下
                    type            按来源分类
                    site            按来源网站
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/all/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        if viewType:
            params['viewType'] = viewType
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_engine(self, site_id=None, date_range=None,
                                clientDevice=None, area=None, visitor=None):
        """获取搜索引擎来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/engine/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if clientDevice:
            params['clientDevice'] = clientDevice
        if area:
            params['area'] = self.area.get(area, area)
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_keyword(self, site_id=None, date_range=None,
                                 source=None, clientDevice=None, visitor=None):
        """获取搜索词来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/searchword/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if source:
            params['source'] = source
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_link(self, site_id=None, date_range=None,
                              viewType=None, domainType=None,
                              clientDevice=None, visitor=None):
        """获取外部链接来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param viewType: 分类标准 可选值
                    domain      按域名
                    url         按URL
        :param domainType: 域名筛选 可选值
                    1           社会化媒体
                    2           导航网站
                    4           电子邮箱
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/link/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if viewType:
            params['viewType'] = viewType
        if domainType:
            params['domainType'] = domainType
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_custom_media(self, site_id=None, date_range=None, flag=None):
        """指定广告跟踪
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param flag: 需要看哪个维度的数据 可选值
                    from        来源
                    plan        计划
                    unit        单元
                    word        关键词
                    idea        创意
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'custom/media/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if flag:
            params['flag'] = flag
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_top_page(self, site_id=None, date_range=None,
                                 source=None, visitor=None):
        """受访页面
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'visitor_count', 'ip_count', 'visit1_count',
            'outward_count', 'exit_count', 'average_stay_time', 'exit_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/toppage/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if source:
            params['source'] = source
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_landing_page(self, site_id=None, date_range=None):
        """入口页面
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :return:
        """
        metric = [
            'visit_count', 'visitor_count', 'new_visitor_count',
            'new_visitor_ratio', 'ip_count', 'out_pv_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/landingpage/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_top_domain(self, site_id=None, date_range=None,
                                   source=None, visitor=None):
        """受访域名
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'average_stay_time', 'avg_visit_pages'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/topdomain/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if source:
            params['source'] = source
        if visitor:
            params['visitor'] = visitor
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_district(self, site_id=None, date_range=None,
                                 source=None, visitor=None):
        """地域分布(按省)
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'average_stay_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/district/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if source:
            params['source'] = source
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_world(self, site_id=None, date_range=None,
                              source=None, visitor=None):
        """地域分布(按国家)
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'average_stay_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/world/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if visitor:
            params['visitor'] = visitor
        if source:
            params['source'] = source
        resp = await self.get(url, params=params)
        return resp.json()


async def main():
    async with BaiduStat() as client:
        r = await client.get_time_trend_report()
        print(r)


if __name__ == '__main__':
    import asyncio

    asyncio.run(main())

标签:count,Python,visitor,site,API,params,date,id,SDK
来源: https://www.cnblogs.com/lazyfish007/p/15389254.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有