141 lines
4.5 KiB
Python
141 lines
4.5 KiB
Python
import re
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
from boatmovers.models import *
|
|
import pandas as pd
|
|
from django.core.exceptions import ValidationError
|
|
|
|
url_heineken = ''
|
|
|
|
|
|
def csv_reader(filename,raceid,clubcol='Ploeg',bankjes=['Slag'],uitslag='Pos',gender='m'):
|
|
race = Race.objects.get(id=raceid)
|
|
nr = race.crew_size
|
|
|
|
df = pd.read_csv(filename)
|
|
# replace column names if different
|
|
|
|
for row in df.itertuples():
|
|
order = row[df.columns.get_loc(uitslag)+1]
|
|
crewname = row[df.columns.get_loc(clubcol)+1]
|
|
crew = Crew(name=crewname)
|
|
crew.save()
|
|
|
|
for i in range(nr):
|
|
try:
|
|
naam = row[df.columns.get_loc(bankjes[i])+1].split(' ')
|
|
first_name = ' '.join(naam[:-1])
|
|
last_name = naam[-1]
|
|
except AttributeError:
|
|
try:
|
|
first_name = str(row.Slag)
|
|
last_name = ''
|
|
except TypeError:
|
|
first_name = 'Unknown'
|
|
last_name = 'Athlete'
|
|
athletes = Athlete.objects.filter(first_name = first_name,
|
|
last_name = last_name,
|
|
gender=gender)
|
|
if len(athletes) >= 1:
|
|
athlete = athletes[0]
|
|
else:
|
|
athlete = Athlete(first_name=first_name,
|
|
last_name=last_name,
|
|
club = crewname,
|
|
gender=gender)
|
|
try:
|
|
athlete.save()
|
|
except ValidationError as e:
|
|
text, id = e.message.split(':')
|
|
athlete = Athlete.objects.get(id=id)
|
|
|
|
print(athlete)
|
|
|
|
crew.athletes.add(athlete)
|
|
|
|
result = Result(
|
|
crew = crew,
|
|
race = race,
|
|
order = order
|
|
)
|
|
try:
|
|
result.save()
|
|
except ValidationError as e:
|
|
print(e)
|
|
|
|
|
|
def time_team_scraper(url,raceid,gender='m'):
|
|
race = Race.objects.get(id=raceid)
|
|
nr = race.crew_size
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.content,features='lxml')
|
|
tbl = soup.find('table')
|
|
order = 1
|
|
|
|
str = re.search('(.*)results(.*)',url)
|
|
base = str.groups()[0]
|
|
|
|
for tr in tbl.findAll("tr"):
|
|
trs = tr.findAll("td")
|
|
for each in trs:
|
|
try:
|
|
link = each.find('a')['href']
|
|
name = each.find('a').contents[0]
|
|
except (TypeError, IndexError):
|
|
link = ''
|
|
name =''
|
|
|
|
if 'entry' in link:
|
|
print(order, name)
|
|
|
|
dfs = pd.read_html(base+link[3:])
|
|
df = dfs[0]
|
|
namen = df['naam']
|
|
|
|
crew = Crew(name=name)
|
|
crew.save()
|
|
for i in range(nr):
|
|
try:
|
|
names = df['naam'][i].split(' ')
|
|
first_name = ' '.join(names[:-1])
|
|
last_name = names[-1]
|
|
except AttributeError:
|
|
try:
|
|
first_name = str(df['naam'][i])
|
|
last_name = ''
|
|
except TypeError:
|
|
first_name = 'Unknown'
|
|
last_name = 'Athlete'
|
|
athletes = Athlete.objects.filter(first_name = first_name,
|
|
last_name = last_name)
|
|
if len(athletes) >= 1:
|
|
athlete = athletes[0]
|
|
else:
|
|
athlete = Athlete(first_name=first_name,
|
|
last_name=last_name,
|
|
club = name,
|
|
gender=gender)
|
|
try:
|
|
athlete.save()
|
|
except ValidationError as e:
|
|
text, id = e.message.split(':')
|
|
athlete = Athlete.objects.get(id=id)
|
|
|
|
print(athlete)
|
|
|
|
crew.athletes.add(athlete)
|
|
|
|
result = Result(
|
|
crew = crew,
|
|
race = race,
|
|
order = order
|
|
)
|
|
try:
|
|
result.save()
|
|
except ValidationError as e:
|
|
print(e)
|
|
|
|
order += 1
|
|
|
|
print('')
|