技术资料
Mysql
Windows下安装mysql 5.6
Python
Python3.5.2 安装(windows环境)
图片爬取和写入
gevent队列任务
selenium模拟浏览器操作
pandas表格和数据应用
OS文件创建
excel格式转换:csv转xls
email自动发送
excel读取指定多行数据
cookie登录后爬取内容
单页文字图片爬取保存到word
学习实践:知网疾病知识
学习实践:知网指南
字典生成树形目录
docx文本图片存入word
-
+
首页
pandas表格和数据应用
## 1.创建表和索引 ```python import pandas as pd from pandas import Series,DataFrame # #通过Series存储每个英雄的基本信息 s1 = pd.Series([1001,'鲁班','18','150.00','男']) s2 = pd.Series([1002,'小乔','19','167.00','女']) s3 = pd.Series([1003,'关羽','30','180.00','男']) series_list=[s1,s2,s3] #创建一个DataFrame对象存储通讯录 df=pd.DataFrame(series_list) print(type(df),'\n',df,'\n') # 默认索引,导入Series sel = Series(data=[1,'TheShy',20,'天,LP长夜']) print(sel,'\n') # 自定义索引,导入Series,索引和data数量一一对应 sel = Series(data=[1,'TheShy',20,'天不生theshy'],index = ['排名','ID号','年龄','评语']) print(sel,'\n') #将字典转换为Series dic1={"姓名":'张三',"性别":'男',"年龄":28,"电话":9} dic2={"姓名":'李四',"性别":'女',"年龄":23,"电话2":"11"} se1=Series(data=dic1) se2=Series(data=dic2) dd = pd.DataFrame([se1,se2],index=['A','B']) print(dd) # 创建二维列表存储选手信息 lol_list = [['上单','TheShy',20], ['打野','小天',19], ['中单','Faker',23], ['ADC','Uzi',22], ['辅助','Ming',21]] df = DataFrame(data=lol_list,index=['a','b','c','d','e'],columns=['位置','ID号','年龄']) print(df,'\n') import pandas as pd # 使用字典创建 dic={ '位置': ['上单', '打野', '中单', 'ADC','辅助'], 'ID号': ['TheShy', '小天', 'Faker', 'Uzi', 'Ming'], 'year': [20, 19, 23, 22,21]} df=pd.DataFrame(dic,index=['A','b','c','d','e']) print(df) ``` ## 2.获取索引和内容 ```python from pandas import Series,DataFrame import pandas as pd emp=['001','002','003','004','005'] name=[['亚瑟', '后裔','小乔','哪吒' ,'王昭君'], ['亚瑟', '后裔','小乔','哪吒' ,'王昭君']] #name=['亚瑟', '后裔','小乔','哪吒' ,'王昭君'] #series = Series(data=name,index=emp) series=DataFrame(data=name,index=['A','B'],columns=emp) #print(series.head(1),'\n',series.tail(1)) #for i in series: # print(i) #for j in series.keys(): # print(j) #print(series[2],'\n',series[['001','005']]) #print(series.ndim) #print(series.index.tolist) df_dict = { '姓名':['ZhangSan','LiSi','WangWu','ZhaoLiu'], 'age':['18','20','19','22'], 'weight':['50','55','60','80'] } df = pd.DataFrame(data=df_dict,index=['001','002','003','004']) for index,row_data in df.iterrows(): print(row_data) print('\n') for col,col_data in df.iteritems(): print(col) print('\n') # 获取某一行某一列的数据 print(df.loc['001','姓名']) # 某一行多列的数据 print(df.loc['001',['姓名','weight']]) # 一行所有列 print(df.loc['001',:]) # 选择间隔的多行多列 print(df.loc[['001','003'],['姓名','weight']]) # 选择连续的多行和间隔的多列 print(df.loc['001':'003','姓名':'weight']) # 取一行 print(df.iloc[1]) # 取连续多行 print(df.iloc[0:2]) # 取间断的多行 print(df.iloc[[0,2],:]) # 取某一列 print(df.iloc[:,1]) # 某一个值 print(df.iloc[1,0]) ``` ## 3.定向查询 ```python from pandas import Series,DataFrame # 使用字典创建 index_list = ['001','002','003','004','005','006','007','008','009','010'] name_list = ['李白','王昭君','诸葛亮','狄仁杰','孙尚香','妲己','周瑜','张飞','王昭君','大乔'] age_list = [25,28,27,25,30,29,25,32,28,26] salary_list = ['10k','12.5','20k','14k','12k','17k','18k','21k','22k','21.5k'] marital_list = ['NO','NO','YES','YES','NO','NO','NO','YES','NO','YES'] dic={ '姓名': Series(data=name_list,index=index_list), '年龄': Series(data=age_list,index=index_list), '薪资': Series(data=salary_list,index=index_list), '婚姻状况': Series(data=marital_list,index=index_list) } df=DataFrame(dic) # 1. 获取工号为003~007的所有员工信息 result1 = df['003':'007'] print(result1) # 2. 获取所有员工的年龄和工资信息 result2 = df.loc[:,['年龄','薪资']] print(result2) # 3. 查看一个你感兴趣员工的婚姻状况 result3 = df.loc['009',['姓名','年龄','婚姻状况']] print(result3,'\n') # 方法一:遍历薪水列 for value in df['薪资']: print(value) # 方法二:遍历薪水列 for index,row_data in df.iterrows(): print(row_data['薪资']) # 方法三:遍历薪水列 for col,col_data in df.iteritems(): if col == '薪资': print(col_data) # 获取最大薪资 for col,col_data in df.iteritems(): if col == '薪资': # 将薪资中的k去掉并转化成float类型 list1 = [float(value[:len(value)-1]) for value in col_data] # 排序 max_salary = sorted(list1,reverse=True)[0] print(max_salary) ``` ## 4.写入excel ```python from pandas import Series,DataFrame # 使用字典创建 index_list = ['001','002','003','004','005','006','007','008','009','010'] name_list = ['李白','王昭君','张飞','狄仁杰','孙尚香','妲己','周瑜','张飞','王昭君','大乔'] age_list = [25,28,27,25,30,29,25,32,28,26] salary_list = ['10k','12.5k','20k','14k','12k','17k','18k','21k','22k','21.5k'] marital_list = ['NO','NO','YES','YES','NO','NO','NO','YES','NO','YES'] dic={ '姓名': Series(data=name_list,index=index_list), '年龄': Series(data=age_list,index=index_list), '薪资': Series(data=salary_list,index=index_list), '婚姻状况': Series(data=marital_list,index=index_list) } df=DataFrame(dic) # 写入csv,path_or_buf为写入文本文件 #index=False不写入行号 df.to_excel('People_Information.xlsx', index=False,encoding='utf_8_sig') print('end') #读取Excel数据 import pandas as pd df=pd.read_excel('People_Information.xlsx',header=0) print(df) print(df.shape) print(df['姓名'].tolist()) #set去重 m = set(df['姓名'].tolist()) print(m) ```
大诚
2022年8月3日 10:21
转发文档
收藏文档
上一篇
下一篇
手机扫码
复制链接
手机扫一扫转发分享
复制链接
Markdown文件
PDF文档
PDF文档(打印)
分享
链接
类型
密码
更新密码