time team scraping works
This commit is contained in:
76
boatmovers/scrapers.py
Normal file
76
boatmovers/scrapers.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from boatmovers.models import *
|
||||
import pandas as pd
|
||||
|
||||
url_heineken = ''
|
||||
|
||||
def time_team_scraper(url,raceid):
|
||||
race = Race.objects.get(id=raceid)
|
||||
nr = race.crew_size
|
||||
r = requests.get(url)
|
||||
soup = BeautifulSoup(r.content,features='lxml')
|
||||
tbl = soup.find('table')
|
||||
order = 1
|
||||
|
||||
str = re.search('(.*)results(.*)',url)
|
||||
base = str.groups()[0]
|
||||
|
||||
for tr in tbl.findAll("tr"):
|
||||
trs = tr.findAll("td")
|
||||
for each in trs:
|
||||
try:
|
||||
link = each.find('a')['href']
|
||||
name = each.find('a').contents[0]
|
||||
except (TypeError, IndexError):
|
||||
link = ''
|
||||
name =''
|
||||
|
||||
if 'entry' in link:
|
||||
print(order, name)
|
||||
|
||||
dfs = pd.read_html(base+link[3:])
|
||||
df = dfs[0]
|
||||
namen = df['naam']
|
||||
|
||||
crew = Crew(name=name)
|
||||
crew.save()
|
||||
for i in range(nr):
|
||||
try:
|
||||
names = df['naam'][i].split(' ')
|
||||
first_name = ' '.join(names[:-1])
|
||||
last_name = names[-1]
|
||||
except AttributeError:
|
||||
try:
|
||||
first_name = str(df['naam'][i])
|
||||
last_name = ''
|
||||
except TypeError:
|
||||
first_name = 'Unknown'
|
||||
last_name = 'Athlete'
|
||||
athletes = Athlete.objects.filter(first_name = first_name,
|
||||
last_name = last_name)
|
||||
if len(athletes) >= 1:
|
||||
athlete = athletes[0]
|
||||
else:
|
||||
athlete = Athlete(first_name=first_name,
|
||||
last_name=last_name,
|
||||
club = name)
|
||||
athlete.save()
|
||||
print(athlete)
|
||||
|
||||
crew.athletes.add(athlete)
|
||||
|
||||
result = Result(
|
||||
crew = crew,
|
||||
race = race,
|
||||
order = order
|
||||
)
|
||||
try:
|
||||
result.save()
|
||||
except ValidationError as e:
|
||||
print(e)
|
||||
|
||||
order += 1
|
||||
|
||||
print('')
|
||||
Reference in New Issue
Block a user