打开jupyter
cmd
C:Usersf>jupyter notebook
import pandas as pd
import numpy as np
import statsmodels.api as sm #Statsmodels库是Python中一个强大的统计分析库,包含假设检验、回归分析、时间序列分析等功能
from scipy.stats.stats import pearsonr
import matplotlib.pyplot as plt
date=pd.read_excel("F:/某公司销售数据-全国订单明细.xls")
print(date.head(10))
date.describe()
df=date[["订单数量","城市","销售额","订单等级"]]
df.head(10)
df2=df.groupby(["城市"]).sum()
df2
px=df2["销售额"]
py=df2["订单数量"]
plt.figure(figsize=(10,5))
plt.scatter(px,py,color='b',label='data')
plt.xlabel('sale')
plt.ylabel('order')
plt.show()
r=pearsonr(df2["销售额"],df2["订单数量"])[0]
print(r)
df2.corr()
X=df2["销售额"]
X=sm.add_constant(X)
model=sm.OLS(df2["订单数量"],X)
model=model.fit()
model.summary()
x=np.linspace(df2["销售额"].min(),df2["销售额"].max(),100)
x=sm.add_constant(x)
y=model.predict(x)
plt.figure(figsize=(10,5))
plt.scatter(px,py,color='b',label='data')
plt.xlabel('sale')
plt.ylabel('order')
plt.plot(x[:,1],y,'r',alpha=1) #alpha透明度,'r'颜色
from scipy.stats import linregress
plt.figure(figsize=(10,5))
plt.scatter(df2["销售额"],df2["订单数量"],color='r',label='data')
slope,intercept,r,p,std_err=linregress(df2["销售额"],df2["订单数量"])
exp=df2["销售额"]*slope+intercept
plt.title(f'y={slope}*x+{intercept}')
plt.xticks(df2["销售额"],df2["订单数量"],rotation=90)
plt.tight_layout()
plt.show()
页面更新:2024-04-23
本站资料均由网友自行发布提供,仅用于学习交流。如有版权问题,请与我联系,QQ:4156828
© CopyRight 2008-2024 All Rights Reserved. Powered By bs178.com 闽ICP备11008920号-3
闽公网安备35020302034844号