Импортированный файл имеет неправильную кодировку: Кодек 'charmap' не может декодировать байт 0x8d в позиции 4510: символы, отображаемые в django import-export
Я пытаюсь импортировать данные из csv. Вот скриншот csv
Как вы можете видеть, я импортировал другой csv, и он был в полном порядке, но... Для этого csv он не работает. Я постоянно получаю ошибку. Я использую кодировку "utf8"
Вот мой код:
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
from csv import writer
url = "https://prowrestling.fandom.com/wiki/New_Japan_Pro_Wrestling/Roster"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
# links = [
# "https://prowrestling.fandom.com/" + a["href"] for a in soup.select("classname a")
# ]
links = [
"https://prowrestling.fandom.com/" + a["href"] for a in soup.select("td a")
]
with open("real/njpw.csv", 'a', encoding="utf8", newline="") as f:
print(f)
wrt = writer(f)
header = ["ring_name", "height", "weight", "born", "birth_place", "trainer", "debut", "resides"]
wrt.writerow(header)
for link in links:
soup = BeautifulSoup(requests.get(link).content, "html.parser")
ring_name = soup.h2.text.strip()
height = soup.select_one('.pi-data-label:-soup-contains("Height") + div')
if height is not None:
height = height.text.strip()
else:
height = ""
weight = soup.select_one('.pi-data-label:-soup-contains("Weight") + div')
if weight is not None:
weight = weight.text.strip()
else:
weight = ""
born = soup.select_one('.pi-data-label:-soup-contains("Born") + div')
if born is not None:
born = born.text.strip()
else:
born = ""
birth_place = soup.select_one('.pi-data-label:-soup-contains("Birth Place") + div')
if birth_place is not None:
birth_place = birth_place.text.strip()
else:
birth_place = ""
trainer = soup.select_one('.pi-data-label:-soup-contains("Trainer") + div')
if trainer is not None:
trainer = trainer.text.strip()
else:
trainer = ""
debut = soup.select_one('.pi-data-label:-soup-contains("Debut") + div')
if debut is not None:
debut = debut.text.strip()
else:
debut = ""
resides = soup.select_one('.pi-data-label:-soup-contains("Resides") + div')
if resides is not None:
resides = resides.text.strip()
else:
resides = ""
table = [ring_name, height, weight, born, birth_place, trainer, debut, resides]
wrt.writerow(table)
print(table)
#gimmik = soup.select_one('.InformationBoxTitle:-soup-contains("Current gimmick") + div')
Есть ли способ решить эту проблему?
Для сохранения данных в формате csv можно использовать встроенный метод padas to_csv()
, который очень прост.
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://prowrestling.fandom.com/wiki/New_Japan_Pro_Wrestling/Roster"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
# links = [
# "https://prowrestling.fandom.com/" + a["href"] for a in soup.select("classname a")
# ]
links = [
"https://prowrestling.fandom.com/" + a["href"] for a in soup.select("td a")
]
# with open("real/njpw.csv", 'a', encoding="utf8", newline="") as f:
# print(f)
# wrt = writer(f)
# header = ["ring_name", "height", "weight", "born", "birth_place", "trainer", "debut", "resides"]
# wrt.writerow(header)
table = []
for link in links:
soup = BeautifulSoup(requests.get(link).content, "html.parser")
ring_name = soup.h2.text.strip()
height = soup.select_one('.pi-data-label:-soup-contains("Height") + div')
if height is not None:
height = height.text.strip()
else:
height = ""
weight = soup.select_one('.pi-data-label:-soup-contains("Weight") + div')
if weight is not None:
weight = weight.text.strip()
else:
weight = ""
born = soup.select_one('.pi-data-label:-soup-contains("Born") + div')
if born is not None:
born = born.text.strip()
else:
born = ""
birth_place = soup.select_one('.pi-data-label:-soup-contains("Birth Place") + div')
if birth_place is not None:
birth_place = birth_place.text.strip()
else:
birth_place = ""
trainer = soup.select_one('.pi-data-label:-soup-contains("Trainer") + div')
if trainer is not None:
trainer = trainer.text.strip()
else:
trainer = ""
debut = soup.select_one('.pi-data-label:-soup-contains("Debut") + div')
if debut is not None:
debut = debut.text.strip()
else:
debut = ""
resides = soup.select_one('.pi-data-label:-soup-contains("Resides") + div')
if resides is not None:
resides = resides.text.strip()
else:
resides = ""
table.append({
'ring_name':ring_name,
'height': weight,
'born': born,
'birth_place':birth_place,
'trainer':trainer,
'debut':debut,
'resides':debut})
df = pd.DataFrame(table)#.to_csv('out.csv',index=False)#to save data in your local just uncomment
print(df)
Выход:
ring_name height ... debut resides
0 Aaron Henare 231 lbs (105 kg) ... September 1, 2012 September 1, 2012
1 United Empire ... October 16, 2020 October 16, 2020
2 Bad Luck Fale 344 lbs (156 kg) ... April 4, 2010 April 4, 2010
3 Bullet Club ... May 3, 2013 May 3, 2013
4 Chase Owens 205 lbs (93 kg) ... February 17, 2007 February 17, 2007
.. ... ... ... ... ...
224 Yota Tsuji 200 lbs (91 kg) ... April 10, 2018 April 10, 2018
225 Yuji Nagata 238 lbs (108 kg) ... September 14, 1992 September 14, 1992
226 Yujiro Takahashi 198 lbs (90 kg) ... June 26, 2004 June 26, 2004
227 Yuya Uemura 180 lbs (82 kg) ... April 10, 2018 April 10, 2018
228 Zack Sabre, Jr. 180 lb (82 kg) ... 2002 2002
[229 rows x 7 columns]