前言
Pandas_Alive不仅包含动态条形图,还可以绘制动态曲线图、气泡图、饼状图、地图等。本文记录环境安装,数据获取,到最后生成动态gif全过程。
安装模块
安装支持库
# centos
yum -y install libjpeg-turbo-devel python3-devel ImageMagick
# mac
brew install ImageMagick
安装pandas_alive
pip3 install pandas_alive -i https://mirrors.aliyun.com/pypi/simple/
pip3 install attrs -i https://mirrors.aliyun.com/pypi/simple/
图表支持中文
获取字体并配置
#获取字体放入下面目录
cd /usr/local/lib64/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf
# 清理缓存
cd ~
rm -rf .cache/matplotlib
# 修改matplotlibrc
## 一个#号是配置,##是注释
vim /usr/local/lib64/python3.6/site-packages/matplotlib/mpl-data/matplotlibrc
## 257行
#font.sans-serif: SimHei, DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif
## 400行
#axes.unicode_minus: False
SimHei.ttf字体文件:
获取matplotlib缓存目录
import matplotlib
matplotlib.get_cachedir()
获取数据
从数据库导出数据
以累计订单按类型分类
bash export.sh 2022-03-01 2022-04-20 0
脚本内容
#!/bin/bash
# 获取指定日期区间内的某类型订单累计金额
start_day=$1
end_day=$2
ordertype=$3
declare -A dic
dic=([0]="未换装电子版" [5]="未换装打印版" [10]="已换装电子版" [15]="已换装打印版")
sql="SELECT
t1.date as date,
sum( t2.value ) as '${dic[$ordertype]}'
FROM (
select
date_format(create_time, '%Y-%m-%d') as date , sum(price)/100 as value
FROM
order_record
where
order_status = 10
and
order_type = ${ordertype}
and
create_time BETWEEN '${start_day}' AND '${end_day}'
GROUP BY
date
) t1
JOIN (
select date_format(create_time, '%Y-%m-%d') as date , sum(price)/100 as value
FROM
order_record
where
order_status = 10
and
order_type = ${ordertype}
and
create_time BETWEEN '${start_day}' AND '${end_day}'
GROUP BY
date
) t2
ON
t1.date >= t2.date
GROUP BY
t1.date
ORDER BY
date"
mysql -uroot -pxxxxx -h 127.0.0.1 kuming -e "${sql}" > ./result/${ordertype}.csv
# 将tab替换为,分割
sed -i 's/\t/,/g' ./result/${ordertype}.csv
数据合并
补充日期后,合并成一个csv文件
python3 manager_data.py
脚本内容:
import pandas as pd
from datetime import datetime, timedelta
import time
import os
from functools import reduce
def load_Data(r_file):
#加载数据
df0 = pd.read_csv(r_file)
return df0
#把datetime转成字符串
def datetime_toString(dt):
return dt.strftime("%Y-%m-%d")
#把字符串转成datetime
def string_toDatetime(string):
return datetime.strptime(string, "%Y-%m-%d")
#计算日期区间
def day_diff(day1, day2):
d1 = time.mktime(time.strptime(day1,'%Y-%m-%d'))
d2 = time.mktime(time.strptime(day2,'%Y-%m-%d'))
daysec = 24 * 60 * 60
return int(( d2 - d1 )/daysec)
#缺失值处理,插值替换
def data_Full(r_file):
df1 = load_Data(r_file) #加载数据
date0 = df1.iloc[0, 0] #初始时间
df1_date = df1['date'].tolist() #数据日期转为列表
df1_data = df1[df1.keys()[1]].tolist() #数据值转为列表
act = day_diff(df1_date[0], df1_date[-1]) #实际期望日期序列长度
for j in range(0, len(df1_date)):
if len(df1_date) < act:
while date0 != df1_date[j]: #如数据中日期列表与期望日期序列不相等,即存在缺失值执行while程序
nada = df1_data[j-1] #计算缺失处左右相邻插值
adda = [date0, nada]
date_da = pd.DataFrame(adda).T
date_da.columns = df1.columns
df1 = pd.concat([df1, date_da]) #将缺失日期加入数据列表中
date0 = datetime_toString(string_toDatetime(date0) + timedelta(days=1)) #日期加一
date0 = datetime_toString(string_toDatetime(date0) + timedelta(days=1)) #日期加一
df1 = df1.sort_values(by=['date'])
return df1
if __name__ == "__main__":
dirs='./data/result/'
files = os.listdir(dirs)
app=[]
for f in files:
r_file = os.path.join(dirs, f)
df = data_Full(r_file)
app.append(df)
df_final = reduce(lambda left, right: pd.merge(left, right, on='date', how='outer'), app)
df_final.to_csv('./data/t.csv',index=0,sep=',')
生成动态gif
生成水平条形图
python3 csv_to_gif.py
脚本内容
import pandas_alive
import pandas as pd
covid_df = pd.read_csv('data/t.csv', index_col=0, parse_dates=[0])
covid_df.plot_animated(filename='output/order.gif', n_visible=4)
柱状图
covid_df.plot_animated(filename='output/order.gif', orientation='v', n_visible=4)
曲线图
covid_df.diff().fillna(0).plot_animated(filename='output/order.gif', kind='line', period_label={'x': 0.25, 'y': 0.9})
这篇文章肯定会火,作者666大顺
1