time team scraping works
This commit is contained in:
33
boatmovers/migrations/0012_auto_20220625_1328.py
Normal file
33
boatmovers/migrations/0012_auto_20220625_1328.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Generated by Django 3.2.12 on 2022-06-25 13:28
|
||||||
|
|
||||||
|
import boatmovers.models
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('boatmovers', '0011_alter_race_processed'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='crew',
|
||||||
|
name='athletes',
|
||||||
|
field=models.ManyToManyField(related_name='crews', to='boatmovers.Athlete'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='race',
|
||||||
|
name='date',
|
||||||
|
field=models.DateField(default=boatmovers.models.current_day, verbose_name='Race Date'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='race',
|
||||||
|
name='resulturl',
|
||||||
|
field=models.URLField(null=True, verbose_name='URL Link to results'),
|
||||||
|
),
|
||||||
|
migrations.AlterUniqueTogether(
|
||||||
|
name='result',
|
||||||
|
unique_together={('crew', 'order')},
|
||||||
|
),
|
||||||
|
]
|
||||||
18
boatmovers/migrations/0013_alter_crew_athletes.py
Normal file
18
boatmovers/migrations/0013_alter_crew_athletes.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 3.2.12 on 2022-06-25 13:31
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('boatmovers', '0012_auto_20220625_1328'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='crew',
|
||||||
|
name='athletes',
|
||||||
|
field=models.ManyToManyField(related_name='athlete_crews', to='boatmovers.Athlete'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -2,6 +2,8 @@ from django.db import models
|
|||||||
from django import forms
|
from django import forms
|
||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
import trueskill
|
import trueskill
|
||||||
@@ -43,7 +45,7 @@ class athleteForm(forms.ModelForm):
|
|||||||
fields = ['first_name','last_name','club','birth_year']
|
fields = ['first_name','last_name','club','birth_year']
|
||||||
|
|
||||||
class Crew(models.Model):
|
class Crew(models.Model):
|
||||||
athletes = models.ManyToManyField(Athlete)
|
athletes = models.ManyToManyField(Athlete, related_name='athlete_crews')
|
||||||
name = models.CharField(max_length=200)
|
name = models.CharField(max_length=200)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
@@ -90,10 +92,10 @@ class Race(models.Model):
|
|||||||
"Cannot have the same crew more than one time in a race"
|
"Cannot have the same crew more than one time in a race"
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(athletes) != len(set(athletes)):
|
#if len(athletes) != len(set(athletes)):
|
||||||
raise ValidationError(
|
# raise ValidationError(
|
||||||
"Cannot have the same athlete in different crews in a race"
|
# "Cannot have the same athlete in different crews in a race"
|
||||||
)
|
# )
|
||||||
|
|
||||||
super(Race, self).save(*args, **kwargs)
|
super(Race, self).save(*args, **kwargs)
|
||||||
|
|
||||||
@@ -208,6 +210,7 @@ class Result(models.Model):
|
|||||||
"Cannot have the same crew more than one time in a race"
|
"Cannot have the same crew more than one time in a race"
|
||||||
)
|
)
|
||||||
if len(athletes) != len(set(athletes)):
|
if len(athletes) != len(set(athletes)):
|
||||||
|
print([item for item, count in collections.Counter(athletes).items() if count>1])
|
||||||
raise ValidationError(
|
raise ValidationError(
|
||||||
"Cannot have the same athlete in different crews in a race"
|
"Cannot have the same athlete in different crews in a race"
|
||||||
)
|
)
|
||||||
|
|||||||
76
boatmovers/scrapers.py
Normal file
76
boatmovers/scrapers.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from boatmovers.models import *
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
url_heineken = ''
|
||||||
|
|
||||||
|
def time_team_scraper(url,raceid):
|
||||||
|
race = Race.objects.get(id=raceid)
|
||||||
|
nr = race.crew_size
|
||||||
|
r = requests.get(url)
|
||||||
|
soup = BeautifulSoup(r.content,features='lxml')
|
||||||
|
tbl = soup.find('table')
|
||||||
|
order = 1
|
||||||
|
|
||||||
|
str = re.search('(.*)results(.*)',url)
|
||||||
|
base = str.groups()[0]
|
||||||
|
|
||||||
|
for tr in tbl.findAll("tr"):
|
||||||
|
trs = tr.findAll("td")
|
||||||
|
for each in trs:
|
||||||
|
try:
|
||||||
|
link = each.find('a')['href']
|
||||||
|
name = each.find('a').contents[0]
|
||||||
|
except (TypeError, IndexError):
|
||||||
|
link = ''
|
||||||
|
name =''
|
||||||
|
|
||||||
|
if 'entry' in link:
|
||||||
|
print(order, name)
|
||||||
|
|
||||||
|
dfs = pd.read_html(base+link[3:])
|
||||||
|
df = dfs[0]
|
||||||
|
namen = df['naam']
|
||||||
|
|
||||||
|
crew = Crew(name=name)
|
||||||
|
crew.save()
|
||||||
|
for i in range(nr):
|
||||||
|
try:
|
||||||
|
names = df['naam'][i].split(' ')
|
||||||
|
first_name = ' '.join(names[:-1])
|
||||||
|
last_name = names[-1]
|
||||||
|
except AttributeError:
|
||||||
|
try:
|
||||||
|
first_name = str(df['naam'][i])
|
||||||
|
last_name = ''
|
||||||
|
except TypeError:
|
||||||
|
first_name = 'Unknown'
|
||||||
|
last_name = 'Athlete'
|
||||||
|
athletes = Athlete.objects.filter(first_name = first_name,
|
||||||
|
last_name = last_name)
|
||||||
|
if len(athletes) >= 1:
|
||||||
|
athlete = athletes[0]
|
||||||
|
else:
|
||||||
|
athlete = Athlete(first_name=first_name,
|
||||||
|
last_name=last_name,
|
||||||
|
club = name)
|
||||||
|
athlete.save()
|
||||||
|
print(athlete)
|
||||||
|
|
||||||
|
crew.athletes.add(athlete)
|
||||||
|
|
||||||
|
result = Result(
|
||||||
|
crew = crew,
|
||||||
|
race = race,
|
||||||
|
order = order
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result.save()
|
||||||
|
except ValidationError as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
order += 1
|
||||||
|
|
||||||
|
print('')
|
||||||
@@ -18,6 +18,9 @@
|
|||||||
<td>{{ athlete.last_name }}</td>
|
<td>{{ athlete.last_name }}</td>
|
||||||
<td>{{ athlete.club }}</td>
|
<td>{{ athlete.club }}</td>
|
||||||
<td>{{ athlete.gender }}</td>
|
<td>{{ athlete.gender }}</td>
|
||||||
|
{% if user.is_authenticated and user.is_staff %}
|
||||||
|
<td><a href="/admin/boatmovers/athlete/{{ athlete.id }}/change/">edit</a></td>
|
||||||
|
{% endif %}
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</table>
|
</table>
|
||||||
|
|||||||
@@ -14,6 +14,11 @@
|
|||||||
<tr>
|
<tr>
|
||||||
<td>{{ result.order }}</td>
|
<td>{{ result.order }}</td>
|
||||||
<td><a href="/boatmovers/crew/{{ result.crew.id }}/">{{ result.crew.name }}</a></td>
|
<td><a href="/boatmovers/crew/{{ result.crew.id }}/">{{ result.crew.name }}</a></td>
|
||||||
|
{% if result.crew.id in duplicate_crews %}
|
||||||
|
<td>!</td>
|
||||||
|
{% elif result.crew.id in duplicate_athletes_crews %}
|
||||||
|
<td>!</td>
|
||||||
|
{% endif %}
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</table>
|
</table>
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ from django.http import HttpResponse, HttpResponseRedirect
|
|||||||
from django.shortcuts import get_object_or_404
|
from django.shortcuts import get_object_or_404
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
# Create your views here.
|
# Create your views here.
|
||||||
from django.views.generic.edit import CreateView
|
from django.views.generic.edit import CreateView
|
||||||
from boatmovers.models import Athlete, Crew, Race, Result
|
from boatmovers.models import Athlete, Crew, Race, Result
|
||||||
@@ -50,7 +52,7 @@ class ResultCreateView(CreateView):
|
|||||||
success_url = '/boatmovers/'
|
success_url = '/boatmovers/'
|
||||||
|
|
||||||
def boatmovers_view(request):
|
def boatmovers_view(request):
|
||||||
athletes = Athlete.objects.all().order_by('-trueskill_exposed','-birth_year','last_name','first_name')
|
athletes = Athlete.objects.filter(trueskill_exposed__gt=0).order_by('-trueskill_exposed','-birth_year','last_name','first_name')
|
||||||
|
|
||||||
races = Race.objects.filter(verified=True,processed=True).order_by('-date')
|
races = Race.objects.filter(verified=True,processed=True).order_by('-date')
|
||||||
new_races = Race.objects.filter(processed=False).order_by('date')
|
new_races = Race.objects.filter(processed=False).order_by('date')
|
||||||
@@ -66,10 +68,34 @@ def race_view(request,id=0):
|
|||||||
race = get_object_or_404(Race, pk=id)
|
race = get_object_or_404(Race, pk=id)
|
||||||
results = race.results.all().order_by('order')
|
results = race.results.all().order_by('order')
|
||||||
|
|
||||||
|
crews = []
|
||||||
|
athletes = []
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
crews.append(result.crew.id)
|
||||||
|
for athlete in result.crew.athletes.all():
|
||||||
|
athletes.append(athlete.id)
|
||||||
|
|
||||||
|
# duplicates
|
||||||
|
duplicate_athletes = [item for item, count in collections.Counter(athletes).items() if count>1]
|
||||||
|
duplicate_crews = [item for item, count in collections.Counter(crews).items() if count>1]
|
||||||
|
duplicate_athletes_crews = []
|
||||||
|
for athlete_id in duplicate_athletes:
|
||||||
|
athlete = Athlete.objects.get(id=athlete_id)
|
||||||
|
crews = [crew.id for crew in athlete.athlete_crews.all()]
|
||||||
|
for crew in crews:
|
||||||
|
duplicate_athletes_crews.append(crew)
|
||||||
|
|
||||||
|
|
||||||
return render(request,
|
return render(request,
|
||||||
'race.html',
|
'race.html',
|
||||||
{'race':race,
|
{
|
||||||
'results':results}
|
'race':race,
|
||||||
|
'results':results,
|
||||||
|
'duplicate_athletes':duplicate_athletes,
|
||||||
|
'duplicate_crews':duplicate_crews,
|
||||||
|
'duplicate_athletes_crews':duplicate_athletes_crews
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def race_verify(request, id=0):
|
def race_verify(request, id=0):
|
||||||
|
|||||||
BIN
db.sqlite3-journal
Normal file
BIN
db.sqlite3-journal
Normal file
Binary file not shown.
Reference in New Issue
Block a user