Private
Public Access
1
0

time team scraping works

This commit is contained in:
Sander Roosendaal
2022-06-25 16:07:11 +02:00
parent 0b88f3c861
commit 44d3dfc919
8 changed files with 172 additions and 8 deletions

View File

@@ -0,0 +1,33 @@
# Generated by Django 3.2.12 on 2022-06-25 13:28
import boatmovers.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('boatmovers', '0011_alter_race_processed'),
]
operations = [
migrations.AlterField(
model_name='crew',
name='athletes',
field=models.ManyToManyField(related_name='crews', to='boatmovers.Athlete'),
),
migrations.AlterField(
model_name='race',
name='date',
field=models.DateField(default=boatmovers.models.current_day, verbose_name='Race Date'),
),
migrations.AlterField(
model_name='race',
name='resulturl',
field=models.URLField(null=True, verbose_name='URL Link to results'),
),
migrations.AlterUniqueTogether(
name='result',
unique_together={('crew', 'order')},
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 3.2.12 on 2022-06-25 13:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('boatmovers', '0012_auto_20220625_1328'),
]
operations = [
migrations.AlterField(
model_name='crew',
name='athletes',
field=models.ManyToManyField(related_name='athlete_crews', to='boatmovers.Athlete'),
),
]

View File

@@ -2,6 +2,8 @@ from django.db import models
from django import forms from django import forms
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
import collections
import datetime import datetime
from django.utils import timezone from django.utils import timezone
import trueskill import trueskill
@@ -43,7 +45,7 @@ class athleteForm(forms.ModelForm):
fields = ['first_name','last_name','club','birth_year'] fields = ['first_name','last_name','club','birth_year']
class Crew(models.Model): class Crew(models.Model):
athletes = models.ManyToManyField(Athlete) athletes = models.ManyToManyField(Athlete, related_name='athlete_crews')
name = models.CharField(max_length=200) name = models.CharField(max_length=200)
def __str__(self): def __str__(self):
@@ -90,10 +92,10 @@ class Race(models.Model):
"Cannot have the same crew more than one time in a race" "Cannot have the same crew more than one time in a race"
) )
if len(athletes) != len(set(athletes)): #if len(athletes) != len(set(athletes)):
raise ValidationError( # raise ValidationError(
"Cannot have the same athlete in different crews in a race" # "Cannot have the same athlete in different crews in a race"
) # )
super(Race, self).save(*args, **kwargs) super(Race, self).save(*args, **kwargs)
@@ -208,6 +210,7 @@ class Result(models.Model):
"Cannot have the same crew more than one time in a race" "Cannot have the same crew more than one time in a race"
) )
if len(athletes) != len(set(athletes)): if len(athletes) != len(set(athletes)):
print([item for item, count in collections.Counter(athletes).items() if count>1])
raise ValidationError( raise ValidationError(
"Cannot have the same athlete in different crews in a race" "Cannot have the same athlete in different crews in a race"
) )

76
boatmovers/scrapers.py Normal file
View File

@@ -0,0 +1,76 @@
import re
from bs4 import BeautifulSoup
import requests
from boatmovers.models import *
import pandas as pd
url_heineken = ''
def time_team_scraper(url,raceid):
race = Race.objects.get(id=raceid)
nr = race.crew_size
r = requests.get(url)
soup = BeautifulSoup(r.content,features='lxml')
tbl = soup.find('table')
order = 1
str = re.search('(.*)results(.*)',url)
base = str.groups()[0]
for tr in tbl.findAll("tr"):
trs = tr.findAll("td")
for each in trs:
try:
link = each.find('a')['href']
name = each.find('a').contents[0]
except (TypeError, IndexError):
link = ''
name =''
if 'entry' in link:
print(order, name)
dfs = pd.read_html(base+link[3:])
df = dfs[0]
namen = df['naam']
crew = Crew(name=name)
crew.save()
for i in range(nr):
try:
names = df['naam'][i].split(' ')
first_name = ' '.join(names[:-1])
last_name = names[-1]
except AttributeError:
try:
first_name = str(df['naam'][i])
last_name = ''
except TypeError:
first_name = 'Unknown'
last_name = 'Athlete'
athletes = Athlete.objects.filter(first_name = first_name,
last_name = last_name)
if len(athletes) >= 1:
athlete = athletes[0]
else:
athlete = Athlete(first_name=first_name,
last_name=last_name,
club = name)
athlete.save()
print(athlete)
crew.athletes.add(athlete)
result = Result(
crew = crew,
race = race,
order = order
)
try:
result.save()
except ValidationError as e:
print(e)
order += 1
print('')

View File

@@ -18,6 +18,9 @@
<td>{{ athlete.last_name }}</td> <td>{{ athlete.last_name }}</td>
<td>{{ athlete.club }}</td> <td>{{ athlete.club }}</td>
<td>{{ athlete.gender }}</td> <td>{{ athlete.gender }}</td>
{% if user.is_authenticated and user.is_staff %}
<td><a href="/admin/boatmovers/athlete/{{ athlete.id }}/change/">edit</a></td>
{% endif %}
</tr> </tr>
{% endfor %} {% endfor %}
</table> </table>

View File

@@ -14,6 +14,11 @@
<tr> <tr>
<td>{{ result.order }}</td> <td>{{ result.order }}</td>
<td><a href="/boatmovers/crew/{{ result.crew.id }}/">{{ result.crew.name }}</a></td> <td><a href="/boatmovers/crew/{{ result.crew.id }}/">{{ result.crew.name }}</a></td>
{% if result.crew.id in duplicate_crews %}
<td>!</td>
{% elif result.crew.id in duplicate_athletes_crews %}
<td>!</td>
{% endif %}
</tr> </tr>
{% endfor %} {% endfor %}
</table> </table>

View File

@@ -3,6 +3,8 @@ from django.http import HttpResponse, HttpResponseRedirect
from django.shortcuts import get_object_or_404 from django.shortcuts import get_object_or_404
from django.urls import reverse from django.urls import reverse
import collections
# Create your views here. # Create your views here.
from django.views.generic.edit import CreateView from django.views.generic.edit import CreateView
from boatmovers.models import Athlete, Crew, Race, Result from boatmovers.models import Athlete, Crew, Race, Result
@@ -50,7 +52,7 @@ class ResultCreateView(CreateView):
success_url = '/boatmovers/' success_url = '/boatmovers/'
def boatmovers_view(request): def boatmovers_view(request):
athletes = Athlete.objects.all().order_by('-trueskill_exposed','-birth_year','last_name','first_name') athletes = Athlete.objects.filter(trueskill_exposed__gt=0).order_by('-trueskill_exposed','-birth_year','last_name','first_name')
races = Race.objects.filter(verified=True,processed=True).order_by('-date') races = Race.objects.filter(verified=True,processed=True).order_by('-date')
new_races = Race.objects.filter(processed=False).order_by('date') new_races = Race.objects.filter(processed=False).order_by('date')
@@ -66,10 +68,34 @@ def race_view(request,id=0):
race = get_object_or_404(Race, pk=id) race = get_object_or_404(Race, pk=id)
results = race.results.all().order_by('order') results = race.results.all().order_by('order')
crews = []
athletes = []
for result in results:
crews.append(result.crew.id)
for athlete in result.crew.athletes.all():
athletes.append(athlete.id)
# duplicates
duplicate_athletes = [item for item, count in collections.Counter(athletes).items() if count>1]
duplicate_crews = [item for item, count in collections.Counter(crews).items() if count>1]
duplicate_athletes_crews = []
for athlete_id in duplicate_athletes:
athlete = Athlete.objects.get(id=athlete_id)
crews = [crew.id for crew in athlete.athlete_crews.all()]
for crew in crews:
duplicate_athletes_crews.append(crew)
return render(request, return render(request,
'race.html', 'race.html',
{'race':race, {
'results':results} 'race':race,
'results':results,
'duplicate_athletes':duplicate_athletes,
'duplicate_crews':duplicate_crews,
'duplicate_athletes_crews':duplicate_athletes_crews
}
) )
def race_verify(request, id=0): def race_verify(request, id=0):

BIN
db.sqlite3-journal Normal file

Binary file not shown.