python pandas相关知识点,练习

首先引入库文件,并进行数据读取

import pandas as pd
import numpy as np
data_Base=pd.read_csv("D:\\Exam_Test\\unicomapp_r0_201904_jinan.csv")
#data_Ite=pd.read_csv("D:\\Exam_Test\\lte_cm_jinan.csv",encoding="gbk") data_Base.shape

显示行与列:

print("行数{0},列数{1}".format(str(data_Base.shape[0]),str(data_Base.shape[1])))

查看字段空值数量:

data_Base.isnull().sum()

删除空值行:

data_Base.dropna(subset=["L-CELLID"],inplace=True)

每个 value 数值的采样点个数:

Df_SINR=data_Base["L-SINR"].value_counts().sort_index().reset_index()
Df_SINR

柱状图:

#SINR 样本分布柱状图
from example.commons import Faker
from pyecharts import options as opts
from pyecharts.charts import Bar
def bar_base() -> Bar:
    c = (
        Bar()
        .add_xaxis(list(Df_SINR["index"]))
        .add_yaxis("SINR样本", list(Df_SINR["L-SINR"]),label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(
            title_opts=opts.TitleOpts(title="SINR 样本分布",pos_left="center"),
            legend_opts=opts.LegendOpts(is_show=True,pos_left="right"),
      
        )
    )
    return c
bar_base().render("L_SINR 分布.html")

获取不是某些值得列:

data_Base=data_Base[~data_Base["L-SINR"].isin(["1"])]
data_Base.shape

设置最大显示列:

pd.set_option("display.max_columns",3000)

设置索引列:

data_Base=data_Base.set_index("RECTIME")

重采样:

data_Apr=data_Base.resample("D").mean().reset_index()
data_Apr

根据指定列生成新的DataFrame:

data_Apr_new=pd.DataFrame(data_Apr,columns=["RECTIME","L-RSRP","L-SINR"])

根据时间获取哪天(Day):

data_Apr_new["RECTIME"].dt.day

双Y轴,趋势图:

#使用 pyecharts 或其他可视化工具,将每天平均 RSRP 和平均 SINR 趋势作图
import pyecharts.options as opts
from example.commons import  Faker
from pyecharts.charts import Line


def line_base() -> Line:
    c = (
        Line()
        .add_xaxis(list(data_Apr_new["RECTIME"].dt.day))
        .add_yaxis("SINR", data_Apr_new["L-SINR"].round(2),is_smooth=True,is_symbol_show=False)
        .extend_axis(
            yaxis=opts.AxisOpts(
                name="RSRP",
                min_=data_Apr_new["L-RSRP"].min().round(2),
                max_=data_Apr_new["L-RSRP"].max().round(2),
               
            )
        )
        .set_global_opts(title_opts=opts.TitleOpts(title="4 月份济南 RSRP 和 SINR 趋势图",pos_left="center"),
                        legend_opts=opts.LegendOpts(pos_left="right"),
                         datazoom_opts=opts.DataZoomOpts(is_show=True),
                        yaxis_opts=opts.AxisOpts(name="SINR",min_=round(data_Apr_new["L-SINR"].values.min(),2),max_=round(data_Apr_new["L-SINR"].values.max(),2)),
               
                        )
    )
    d = (
        Line()
        .add_xaxis(list(data_Apr_new["RECTIME"].dt.day))
        .add_yaxis("RSRP", data_Apr_new["L-RSRP"].round(2),yaxis_index=1,is_smooth=True,is_symbol_show=False)
        
    )
    return c.overlap(d)
line_base().render_notebook()

条件判断:

condition1=df_cm_new["样本量"]>100
condition2=df_cm_new["RSRP>=-110 采样点占比"]>0.8
condition3=df_cm_new["SINR>0 采样点占比"]<0.7
df_cm_new=df_cm_new[condition1&condition2&condition3]
df_cm_new.head()

发送邮件:

#发送邮件
import smtplib
from email.mime.text import MIMEText    #MIME (Multipurpose Internet Mail Extensions) 是描述消息内容类型的因特网标准。MIME 消息能包含文本、图像、音频、视频以及其他应用程序专用的数据。
from email.mime.multipart import MIMEMultipart
from email.header import Header
from email.mime.application import MIMEApplication
# 第三方 SMTP 服务
mail_host="smtp.qq.com"  #设置服务器
mail_user="597945025@qq.com"    #用户名
mail_pass="cwtytropotbubgai"   #口令 
sender = '597945025@qq.com'
receivers = ['625645840@qq.com']  # 接收邮件,设置为接收方的邮箱
#创建一个带附件的实例
message = MIMEMultipart()
message['From'] = Header(sender, 'utf-8')
message['To'] =  Header(str(receivers), 'utf-8')
subject = 'mail test'
message['Subject'] = Header(subject, 'utf-8') 

#邮件正文内容
message.attach(MIMEText('这是邮箱测试,请查收', 'plain', 'utf-8'))
for city in City2:
# f = 'D:\\data.csv'
    xlsxpart = MIMEApplication(open(city+".xls", 'rb').read())
    xlsxpart.add_header('Content-Disposition', 'attachment', filename=('gbk', '', (city+".xls")))
    message.attach(xlsxpart)
try:
    smtpObj = smtplib.SMTP() 
    smtpObj.connect(mail_host, 25)    # 25 为 SMTP 端口号
    smtpObj.login(mail_user,mail_pass)
    smtpObj.sendmail(sender, receivers, message.as_string())
    print ("邮件发送成功")
except smtplib.SMTPException:
    print ("Error: 无法发送邮件")

地图:

from pyecharts.charts import Map
from pyecharts.charts import Page
from pyecharts import options as opts
city =df_last["City2"]
val_min_rsrp,val_max_rsrp = df_last["RSRP>=-110 采样点占比"].min().round(2),df_last["RSRP>=-110 采样点占比"].max().round(2)
val_min_sinr,val_max_sinr =df_last["SINR>0 采样点占比"].min().round(2),df_last["SINR>0 采样点占比"].max().round(2)
visual_color = ['#df2f48','#dfa59b','#1c39ca','#80d327']
def map_left() -> Map:
    c = (
        Map()
        .add("", [list(z) for z in zip(list(city), list(df_last["RSRP>=-110 采样点占比"]))], "济南")
        .set_global_opts(
            title_opts=opts.TitleOpts(title="济南各区县4G网络良好覆盖(RSRP>=-110)比例分布图",pos_left="center"),
            visualmap_opts=opts.VisualMapOpts(min_=val_min_rsrp,max_=val_max_rsrp,range_color=visual_color),
            tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %")
        )
        )
    return c
def map_right() -> Map:
    c = (
        Map()
        .add("", [list(z) for z in zip(list(city), list(df_last["SINR>0 采样点占比"]))], "济南")
        .set_global_opts(
            title_opts=opts.TitleOpts(title="济南各区县4G网络良好质量(SINR>0)比例分布图",pos_left="center"),
            visualmap_opts=opts.VisualMapOpts(min_=val_min_sinr,max_=val_max_sinr),
            tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %")
        )
    )    
    return c

page = Page(interval=0)
page.add(map_left(), map_right())
page.render_notebook()

对指定列进行处理(正则)

具体过程:首先将其转化成str,然后进行正则表达式匹配

data_Base['USER-ID'] = data_Base['USER-ID'].astype(np.str)
import re
pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$')
data_Base["USER-ID"]=data_Base["USER-ID"].apply(lambda x:x if pattern.match(x) else np.nan)

zip转化指定格式:

将三列(名称,经度,纬度)  

# # a=[list (z) for z in zip(grid_no["longitude"],grid_no["latitude"])]
# list( zip(grid_no["longitude"],grid_no["latitude"]) )
# list( grid_no["grid_no"])
#zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表 base = { inx: list(item) for inx, item in zip( list( grid_no['grid_no'] ), list( zip(grid_no['longitude'], grid_no['latitude']) ) ) }

调用百度地图BMAP

with open('./data.json', 'w') as outfile:
    json.dump(base, outfile)
from pyecharts.charts import BMap
import json
BAIDU_AK = "GbQ806nWqGFMjuiGjTm6jPgcVGWICGA1"
def bmap_base() -> BMap:
    c = (
        BMap(init_opts = opts.InitOpts(height='615px',width='1350px'))
        .add_schema(
            baidu_ak=BAIDU_AK,
            center=[117.064366, 36.646401],
            zoom=15
        )
        .add_coordinate_json(json_file='./data.json')
        .add(
            "",
            data_pair =[list(z) for z in zip(list(data["grid_no"]),list(data["覆盖好质量差的质差样本占比"]))],
            label_opts=opts.LabelOpts(is_show=False),
            symbol_size=6,
            type_ = 'effectScatter'
        )
        .add_control_panel(navigation_control_opts=opts.BMapNavigationControlOpts(),
                          scale_control_opts=opts.BMapScaleControlOpts(),
                          overview_map_opts=opts.BMapOverviewMapControlOpts(is_open=True,offset_width=0,offset_height=0))
        
        .set_global_opts(title_opts=opts.TitleOpts(title="济南覆盖好质量差SINR质差栅格分布图",pos_left='center'))
        .set_series_opts(effect_opts=opts.EffectOpts(symbol='circle',scale=5,brush_type = "stroke"))
                
    )
    return c
bmap =  bmap_base()

bmap.render("济南质差栅格分布图.html")
bmap.render_notebook()

堆叠柱状图

bar=Bar(init_opts = opts.InitOpts(height='350px'))
bar.add_xaxis(list(prb_label_city_count["City1"].unique()))
for label in prb_label_city_count["prb_label"].unique():
    p = prb_label_city_count[prb_label_city_count["prb_label"]==label]
    bar.add_yaxis(label,list((p["prb_label_per"]*100).round(2)),stack="stack1") 

bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
bar.set_global_opts(
                    title_opts=opts.TitleOpts(title="各地市PRB利用率分区间段分布图",pos_left="center"),
                   legend_opts=opts.LegendOpts(pos_top="8%"),
                   yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value} %"),max_=100),
                    tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %")
                    )
bar.render_notebook()

根据省信息标注市区地图:

city = avg_traffic['City1'] + "市"
val_min,val_max = avg_traffic["Downlink traffic at the PDCP Layer"].min(),avg_traffic["Downlink traffic at the PDCP Layer"].max()
def map_shandong() -> Map:
    c = (
        Map()
        .add("", [list(z) for z in zip(list(city), list(avg_traffic["Downlink traffic at the PDCP Layer"].round(2)))], "山东")
        .set_global_opts(
            title_opts=opts.TitleOpts(title="2月份各地市平均单小区忙时业务量",pos_left="center"),
            visualmap_opts=opts.VisualMapOpts(min_=val_min,max_=val_max),
        )
    )
    return c


mymap = map_shandong()
mymap.render()
mymap.render_notebook()

根据特定关键字数据进行分段操作:

newtable["prb_label"] = pd.cut(newtable["Average downlink PRB usage"],[0,0.2,0.5,0.8,1],labels=["低负荷","中等负荷","高负荷","超高负荷"],include_lowest=True)