Fork me on GitHub

CSV

CSV文件如何读写

写出这篇文章的原因主要是最近在看一本书《Python爬虫开发与实战-从入门到实战》里面提到了CSV这个模块,我立马进行了尝试,发现非常好用,比之前的xlwt好用多了。

关键是爬取到数据之后,整个存储数据的逻辑更容易理解(可能还是自己太菜吧😭)本文中介绍的通过pandasCSV 模块对数据进行读写操作

写入文件

pandas写入

1
2
3
4
5
6
7
8
9
10
11
12
# 1-pandas写入

import pandas as pd

data = [{"name":"yangming","age":32,"height":180,"address":"shenzhen"},
{"name":"xiaoming","age":24,"height":168,"address":"guangzhou"},
{"name":"zhoujun","age":29,"height":184,"address":"shanghai"},
{"name":"zhangshan","age":20,"height":170,"address":"changsha"}
]

df = pd.DataFrame(data)
df
name age height address
0 yangming 32 180 shenzhen
1 xiaoming 24 168 guangzhou
2 zhoujun 29 184 shanghai
3 zhangshan 20 170 changsha
1
2
3
4
# 将DataFrame存储为csv,index表示是否显示行名,default=True

df.to_csv("tocsvfile-pandas.csv",sep=",")
pd.read_csv("tocsvfile-pandas.csv")
Unnamed: 0 name age height address
0 0 yangming 32 180 shenzhen
1 1 xiaoming 24 168 guangzhou
2 2 zhoujun 29 184 shanghai
3 3 zhangshan 20 170 changsha
1
2
3
4
# !!!如何理解index参数

df.to_csv("tocsvfile-pandas-1.csv",index=False,sep=",")
pd.read_csv("tocsvfile-pandas-1.csv")
name age height address
0 yangming 32 180 shenzhen
1 xiaoming 24 168 guangzhou
2 zhoujun 29 184 shanghai
3 zhangshan 20 170 changsha

csv写入

字典形式写入
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
### csv通过字典形式存储文件

import csv

data = [{"name":"yangming","age":32,"height":180,"address":"shenzhen"},
{"name":"xiaoming","age":24,"height":168,"address":"guangzhou"},
{"name":"zhoujun","age":29,"height":184,"address":"shanghai"},
{"name":"zhangshan","age":20,"height":170,"address":"changsha"}
]

with open("information.csv","w",encoding="utf-8") as f:
writer = csv.DictWriter(f,fieldnames=["name","age","height","address"])
writer.writeheader()
writer.writerows(data) # 写入整个数据data-----用writerows
writer.writerow({"name":"Peter","age":28,"height":176,"address":"shenzhen"}) # 单独写入一条数据----用writerow
1
2
3
4
import pandas as pd

data = pd.read_csv("information.csv")
data
name age height address
0 yangming 32 180 shenzhen
1 xiaoming 24 168 guangzhou
2 zhoujun 29 184 shanghai
3 zhangshan 20 170 changsha
4 Peter 28 176 shenzhen
列表形式写入
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# with 语句写在开头,防止属性字段重复写入文件中
# 例子:简书APP之旅

with open("information-1.csv",'a',newline ='',encoding="utf-8") as f:
writer = csv.DictWriter(f,fieldnames=["name","age","height","address"])
writer.writeheader()

for i in range(1,5):
name_list = ["xiaoming","yanghong","peter","Tom"] * i
age_list = [19,27,32,24] * i
height_list = [176,180,172,183] * i
address_list = ["shenzhen","guangzhou","shanghai","changsha"] * i

infomation_list = []
for j in range(len(name_list)):
information = {
"name":name_list[j],
"age":age_list[j],
"height":height_list[j],
"address":address_list[j]
}
infomation_list.append(information)

writer.writerows(infomation_list)

# 读取数据
import pandas as pd

data = pd.read_csv("information-1.csv")
data
name age height address
0 xiaoming 19 176 shenzhen
1 yanghong 27 180 guangzhou
2 peter 32 172 shanghai
3 Tom 24 183 changsha
4 xiaoming 19 176 shenzhen
5 yanghong 27 180 guangzhou
6 peter 32 172 shanghai
7 Tom 24 183 changsha
8 xiaoming 19 176 shenzhen
9 yanghong 27 180 guangzhou
10 peter 32 172 shanghai
11 Tom 24 183 changsha
12 xiaoming 19 176 shenzhen
13 yanghong 27 180 guangzhou
14 peter 32 172 shanghai
15 Tom 24 183 changsha
单行写入
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import csv

# 1-设置文件头
fileHeader = ["name", "score"]

# 2-待写入3行数据
d1 = ["Wang", "100"]
d2 = ["Li", "80"]
d3 = ["xiaosi","92"]

# 3-写入数据
f = open("instance_1.csv", "w")
writer = csv.writer(f) # 生成writer对象

# 写入的内容都是以列表的形式整体传入函数
# writer.writerows([fileHeader, d1, d2, d3]) # 这行等效于下面的三行代码

# 4-数据单个形式传入
writer.writerow(fileHeader)
writer.writerow(d1)
writer.writerow(d2)
writer.writerow(d3)

# 5-需要关闭文件!!!
f.close()
1
pd.read_csv("instance_1.csv")
name score
0 Wang 100
1 Li 80
2 xiaosi 92
上下文写入-with
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import csv

# 文件头
fileHeader = ["name", "score"]

# 写入的两行数据
d1 = ["Wang", "100"]
d2 = ["Li", "80"]
d3 = ["xiaosi","92"]

# 写入数据

with open("instance_2.csv", "a") as f:
writer = csv.writer(f)
writer.writerows([fileHeader, d1, d2, d3])

pd.read_csv("instance_2.csv")
name score
0 Wang 100
1 Li 80
2 xiaosi 92

读文件

pandas读取文件

1
2
3
4
5
6
# 1-通过pandas读取文件

import pandas as pd

csvfile = pd.read_csv("information.csv")
csvfile
name age height address
0 yangming 32 180 shenzhen
1 xiaoming 24 168 guangzhou
2 zhoujun 29 184 shanghai
3 zhangshan 20 170 changsha
4 Peter 28 176 shenzhen

csv模块读取文件

1
2
3
4
5
6
7
8
# 2-通过csv模块读取文件

import csv

with open("information-1.csv") as f:
csvfile = csv.reader(f)
for line in csvfile: # 不需要用readlines
print(line)
['name', 'age', 'height', 'address']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']

本文标题:CSV

发布时间:2020年07月19日 - 13:07

原始链接:http://www.renpeter.cn/2020/07/19/CSV.html

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。

Coffee or Tea