Private
Public Access
1
0

time team scraping works

This commit is contained in:
Sander Roosendaal
2022-06-25 16:07:11 +02:00
parent 0b88f3c861
commit 44d3dfc919
8 changed files with 172 additions and 8 deletions

View File

@@ -0,0 +1,33 @@
# Generated by Django 3.2.12 on 2022-06-25 13:28
import boatmovers.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('boatmovers', '0011_alter_race_processed'),
]
operations = [
migrations.AlterField(
model_name='crew',
name='athletes',
field=models.ManyToManyField(related_name='crews', to='boatmovers.Athlete'),
),
migrations.AlterField(
model_name='race',
name='date',
field=models.DateField(default=boatmovers.models.current_day, verbose_name='Race Date'),
),
migrations.AlterField(
model_name='race',
name='resulturl',
field=models.URLField(null=True, verbose_name='URL Link to results'),
),
migrations.AlterUniqueTogether(
name='result',
unique_together={('crew', 'order')},
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 3.2.12 on 2022-06-25 13:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('boatmovers', '0012_auto_20220625_1328'),
]
operations = [
migrations.AlterField(
model_name='crew',
name='athletes',
field=models.ManyToManyField(related_name='athlete_crews', to='boatmovers.Athlete'),
),
]

View File

@@ -2,6 +2,8 @@ from django.db import models
from django import forms
from django.core.exceptions import ValidationError
import collections
import datetime
from django.utils import timezone
import trueskill
@@ -43,7 +45,7 @@ class athleteForm(forms.ModelForm):
fields = ['first_name','last_name','club','birth_year']
class Crew(models.Model):
athletes = models.ManyToManyField(Athlete)
athletes = models.ManyToManyField(Athlete, related_name='athlete_crews')
name = models.CharField(max_length=200)
def __str__(self):
@@ -90,10 +92,10 @@ class Race(models.Model):
"Cannot have the same crew more than one time in a race"
)
if len(athletes) != len(set(athletes)):
raise ValidationError(
"Cannot have the same athlete in different crews in a race"
)
#if len(athletes) != len(set(athletes)):
# raise ValidationError(
# "Cannot have the same athlete in different crews in a race"
# )
super(Race, self).save(*args, **kwargs)
@@ -208,6 +210,7 @@ class Result(models.Model):
"Cannot have the same crew more than one time in a race"
)
if len(athletes) != len(set(athletes)):
print([item for item, count in collections.Counter(athletes).items() if count>1])
raise ValidationError(
"Cannot have the same athlete in different crews in a race"
)

76
boatmovers/scrapers.py Normal file
View File

@@ -0,0 +1,76 @@
import re
from bs4 import BeautifulSoup
import requests
from boatmovers.models import *
import pandas as pd
url_heineken = ''
def time_team_scraper(url,raceid):
race = Race.objects.get(id=raceid)
nr = race.crew_size
r = requests.get(url)
soup = BeautifulSoup(r.content,features='lxml')
tbl = soup.find('table')
order = 1
str = re.search('(.*)results(.*)',url)
base = str.groups()[0]
for tr in tbl.findAll("tr"):
trs = tr.findAll("td")
for each in trs:
try:
link = each.find('a')['href']
name = each.find('a').contents[0]
except (TypeError, IndexError):
link = ''
name =''
if 'entry' in link:
print(order, name)
dfs = pd.read_html(base+link[3:])
df = dfs[0]
namen = df['naam']
crew = Crew(name=name)
crew.save()
for i in range(nr):
try:
names = df['naam'][i].split(' ')
first_name = ' '.join(names[:-1])
last_name = names[-1]
except AttributeError:
try:
first_name = str(df['naam'][i])
last_name = ''
except TypeError:
first_name = 'Unknown'
last_name = 'Athlete'
athletes = Athlete.objects.filter(first_name = first_name,
last_name = last_name)
if len(athletes) >= 1:
athlete = athletes[0]
else:
athlete = Athlete(first_name=first_name,
last_name=last_name,
club = name)
athlete.save()
print(athlete)
crew.athletes.add(athlete)
result = Result(
crew = crew,
race = race,
order = order
)
try:
result.save()
except ValidationError as e:
print(e)
order += 1
print('')

View File

@@ -18,6 +18,9 @@
<td>{{ athlete.last_name }}</td>
<td>{{ athlete.club }}</td>
<td>{{ athlete.gender }}</td>
{% if user.is_authenticated and user.is_staff %}
<td><a href="/admin/boatmovers/athlete/{{ athlete.id }}/change/">edit</a></td>
{% endif %}
</tr>
{% endfor %}
</table>

View File

@@ -14,6 +14,11 @@
<tr>
<td>{{ result.order }}</td>
<td><a href="/boatmovers/crew/{{ result.crew.id }}/">{{ result.crew.name }}</a></td>
{% if result.crew.id in duplicate_crews %}
<td>!</td>
{% elif result.crew.id in duplicate_athletes_crews %}
<td>!</td>
{% endif %}
</tr>
{% endfor %}
</table>

View File

@@ -3,6 +3,8 @@ from django.http import HttpResponse, HttpResponseRedirect
from django.shortcuts import get_object_or_404
from django.urls import reverse
import collections
# Create your views here.
from django.views.generic.edit import CreateView
from boatmovers.models import Athlete, Crew, Race, Result
@@ -50,7 +52,7 @@ class ResultCreateView(CreateView):
success_url = '/boatmovers/'
def boatmovers_view(request):
athletes = Athlete.objects.all().order_by('-trueskill_exposed','-birth_year','last_name','first_name')
athletes = Athlete.objects.filter(trueskill_exposed__gt=0).order_by('-trueskill_exposed','-birth_year','last_name','first_name')
races = Race.objects.filter(verified=True,processed=True).order_by('-date')
new_races = Race.objects.filter(processed=False).order_by('date')
@@ -66,10 +68,34 @@ def race_view(request,id=0):
race = get_object_or_404(Race, pk=id)
results = race.results.all().order_by('order')
crews = []
athletes = []
for result in results:
crews.append(result.crew.id)
for athlete in result.crew.athletes.all():
athletes.append(athlete.id)
# duplicates
duplicate_athletes = [item for item, count in collections.Counter(athletes).items() if count>1]
duplicate_crews = [item for item, count in collections.Counter(crews).items() if count>1]
duplicate_athletes_crews = []
for athlete_id in duplicate_athletes:
athlete = Athlete.objects.get(id=athlete_id)
crews = [crew.id for crew in athlete.athlete_crews.all()]
for crew in crews:
duplicate_athletes_crews.append(crew)
return render(request,
'race.html',
{'race':race,
'results':results}
{
'race':race,
'results':results,
'duplicate_athletes':duplicate_athletes,
'duplicate_crews':duplicate_crews,
'duplicate_athletes_crews':duplicate_athletes_crews
}
)
def race_verify(request, id=0):

BIN
db.sqlite3-journal Normal file

Binary file not shown.