Imported file has a wrong encoding: 'charmap' codec can't decode byte 0x8d in position 4510: character maps to in django import-export
I am trying to import data from a csv. Here is the csv's screenshot
as you can see I've imported another csv it was completely ok but. For this csv it's not working. I am getting the error all the time. I am using encoding "utf8"
Here is my code:
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
from csv import writer
url = "https://prowrestling.fandom.com/wiki/New_Japan_Pro_Wrestling/Roster"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
# links = [
# "https://prowrestling.fandom.com/" + a["href"] for a in soup.select("classname a")
# ]
links = [
"https://prowrestling.fandom.com/" + a["href"] for a in soup.select("td a")
]
with open("real/njpw.csv", 'a', encoding="utf8", newline="") as f:
print(f)
wrt = writer(f)
header = ["ring_name", "height", "weight", "born", "birth_place", "trainer", "debut", "resides"]
wrt.writerow(header)
for link in links:
soup = BeautifulSoup(requests.get(link).content, "html.parser")
ring_name = soup.h2.text.strip()
height = soup.select_one('.pi-data-label:-soup-contains("Height") + div')
if height is not None:
height = height.text.strip()
else:
height = ""
weight = soup.select_one('.pi-data-label:-soup-contains("Weight") + div')
if weight is not None:
weight = weight.text.strip()
else:
weight = ""
born = soup.select_one('.pi-data-label:-soup-contains("Born") + div')
if born is not None:
born = born.text.strip()
else:
born = ""
birth_place = soup.select_one('.pi-data-label:-soup-contains("Birth Place") + div')
if birth_place is not None:
birth_place = birth_place.text.strip()
else:
birth_place = ""
trainer = soup.select_one('.pi-data-label:-soup-contains("Trainer") + div')
if trainer is not None:
trainer = trainer.text.strip()
else:
trainer = ""
debut = soup.select_one('.pi-data-label:-soup-contains("Debut") + div')
if debut is not None:
debut = debut.text.strip()
else:
debut = ""
resides = soup.select_one('.pi-data-label:-soup-contains("Resides") + div')
if resides is not None:
resides = resides.text.strip()
else:
resides = ""
table = [ring_name, height, weight, born, birth_place, trainer, debut, resides]
wrt.writerow(table)
print(table)
#gimmik = soup.select_one('.InformationBoxTitle:-soup-contains("Current gimmick") + div')
Is there any way to solve this issue?
To store data in csv format, you can use padas built-in to_csv()
method which is super easy.
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://prowrestling.fandom.com/wiki/New_Japan_Pro_Wrestling/Roster"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
# links = [
# "https://prowrestling.fandom.com/" + a["href"] for a in soup.select("classname a")
# ]
links = [
"https://prowrestling.fandom.com/" + a["href"] for a in soup.select("td a")
]
# with open("real/njpw.csv", 'a', encoding="utf8", newline="") as f:
# print(f)
# wrt = writer(f)
# header = ["ring_name", "height", "weight", "born", "birth_place", "trainer", "debut", "resides"]
# wrt.writerow(header)
table = []
for link in links:
soup = BeautifulSoup(requests.get(link).content, "html.parser")
ring_name = soup.h2.text.strip()
height = soup.select_one('.pi-data-label:-soup-contains("Height") + div')
if height is not None:
height = height.text.strip()
else:
height = ""
weight = soup.select_one('.pi-data-label:-soup-contains("Weight") + div')
if weight is not None:
weight = weight.text.strip()
else:
weight = ""
born = soup.select_one('.pi-data-label:-soup-contains("Born") + div')
if born is not None:
born = born.text.strip()
else:
born = ""
birth_place = soup.select_one('.pi-data-label:-soup-contains("Birth Place") + div')
if birth_place is not None:
birth_place = birth_place.text.strip()
else:
birth_place = ""
trainer = soup.select_one('.pi-data-label:-soup-contains("Trainer") + div')
if trainer is not None:
trainer = trainer.text.strip()
else:
trainer = ""
debut = soup.select_one('.pi-data-label:-soup-contains("Debut") + div')
if debut is not None:
debut = debut.text.strip()
else:
debut = ""
resides = soup.select_one('.pi-data-label:-soup-contains("Resides") + div')
if resides is not None:
resides = resides.text.strip()
else:
resides = ""
table.append({
'ring_name':ring_name,
'height': weight,
'born': born,
'birth_place':birth_place,
'trainer':trainer,
'debut':debut,
'resides':debut})
df = pd.DataFrame(table)#.to_csv('out.csv',index=False)#to save data in your local just uncomment
print(df)
Output:
ring_name height ... debut resides
0 Aaron Henare 231 lbs (105 kg) ... September 1, 2012 September 1, 2012
1 United Empire ... October 16, 2020 October 16, 2020
2 Bad Luck Fale 344 lbs (156 kg) ... April 4, 2010 April 4, 2010
3 Bullet Club ... May 3, 2013 May 3, 2013
4 Chase Owens 205 lbs (93 kg) ... February 17, 2007 February 17, 2007
.. ... ... ... ... ...
224 Yota Tsuji 200 lbs (91 kg) ... April 10, 2018 April 10, 2018
225 Yuji Nagata 238 lbs (108 kg) ... September 14, 1992 September 14, 1992
226 Yujiro Takahashi 198 lbs (90 kg) ... June 26, 2004 June 26, 2004
227 Yuya Uemura 180 lbs (82 kg) ... April 10, 2018 April 10, 2018
228 Zack Sabre, Jr. 180 lb (82 kg) ... 2002 2002
[229 rows x 7 columns]