From 44d3dfc919646e2313cc595f306d9c79da2b3900 Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Sat, 25 Jun 2022 16:07:11 +0200 Subject: [PATCH] time team scraping works --- .../migrations/0012_auto_20220625_1328.py | 33 ++++++++ .../migrations/0013_alter_crew_athletes.py | 18 +++++ boatmovers/models.py | 13 +-- boatmovers/scrapers.py | 76 ++++++++++++++++++ boatmovers/templates/boatmovers.html | 3 + boatmovers/templates/race.html | 5 ++ boatmovers/views.py | 32 +++++++- db.sqlite3-journal | Bin 0 -> 12824 bytes 8 files changed, 172 insertions(+), 8 deletions(-) create mode 100644 boatmovers/migrations/0012_auto_20220625_1328.py create mode 100644 boatmovers/migrations/0013_alter_crew_athletes.py create mode 100644 boatmovers/scrapers.py create mode 100644 db.sqlite3-journal diff --git a/boatmovers/migrations/0012_auto_20220625_1328.py b/boatmovers/migrations/0012_auto_20220625_1328.py new file mode 100644 index 00000000..9a0b5552 --- /dev/null +++ b/boatmovers/migrations/0012_auto_20220625_1328.py @@ -0,0 +1,33 @@ +# Generated by Django 3.2.12 on 2022-06-25 13:28 + +import boatmovers.models +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('boatmovers', '0011_alter_race_processed'), + ] + + operations = [ + migrations.AlterField( + model_name='crew', + name='athletes', + field=models.ManyToManyField(related_name='crews', to='boatmovers.Athlete'), + ), + migrations.AlterField( + model_name='race', + name='date', + field=models.DateField(default=boatmovers.models.current_day, verbose_name='Race Date'), + ), + migrations.AlterField( + model_name='race', + name='resulturl', + field=models.URLField(null=True, verbose_name='URL Link to results'), + ), + migrations.AlterUniqueTogether( + name='result', + unique_together={('crew', 'order')}, + ), + ] diff --git a/boatmovers/migrations/0013_alter_crew_athletes.py b/boatmovers/migrations/0013_alter_crew_athletes.py new file mode 100644 index 00000000..8f9634c4 --- /dev/null +++ b/boatmovers/migrations/0013_alter_crew_athletes.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.12 on 2022-06-25 13:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('boatmovers', '0012_auto_20220625_1328'), + ] + + operations = [ + migrations.AlterField( + model_name='crew', + name='athletes', + field=models.ManyToManyField(related_name='athlete_crews', to='boatmovers.Athlete'), + ), + ] diff --git a/boatmovers/models.py b/boatmovers/models.py index b2664c4b..d930eab3 100644 --- a/boatmovers/models.py +++ b/boatmovers/models.py @@ -2,6 +2,8 @@ from django.db import models from django import forms from django.core.exceptions import ValidationError +import collections + import datetime from django.utils import timezone import trueskill @@ -43,7 +45,7 @@ class athleteForm(forms.ModelForm): fields = ['first_name','last_name','club','birth_year'] class Crew(models.Model): - athletes = models.ManyToManyField(Athlete) + athletes = models.ManyToManyField(Athlete, related_name='athlete_crews') name = models.CharField(max_length=200) def __str__(self): @@ -90,10 +92,10 @@ class Race(models.Model): "Cannot have the same crew more than one time in a race" ) - if len(athletes) != len(set(athletes)): - raise ValidationError( - "Cannot have the same athlete in different crews in a race" - ) + #if len(athletes) != len(set(athletes)): + # raise ValidationError( + # "Cannot have the same athlete in different crews in a race" + # ) super(Race, self).save(*args, **kwargs) @@ -208,6 +210,7 @@ class Result(models.Model): "Cannot have the same crew more than one time in a race" ) if len(athletes) != len(set(athletes)): + print([item for item, count in collections.Counter(athletes).items() if count>1]) raise ValidationError( "Cannot have the same athlete in different crews in a race" ) diff --git a/boatmovers/scrapers.py b/boatmovers/scrapers.py new file mode 100644 index 00000000..f4f9c658 --- /dev/null +++ b/boatmovers/scrapers.py @@ -0,0 +1,76 @@ +import re +from bs4 import BeautifulSoup +import requests +from boatmovers.models import * +import pandas as pd + +url_heineken = '' + +def time_team_scraper(url,raceid): + race = Race.objects.get(id=raceid) + nr = race.crew_size + r = requests.get(url) + soup = BeautifulSoup(r.content,features='lxml') + tbl = soup.find('table') + order = 1 + + str = re.search('(.*)results(.*)',url) + base = str.groups()[0] + + for tr in tbl.findAll("tr"): + trs = tr.findAll("td") + for each in trs: + try: + link = each.find('a')['href'] + name = each.find('a').contents[0] + except (TypeError, IndexError): + link = '' + name ='' + + if 'entry' in link: + print(order, name) + + dfs = pd.read_html(base+link[3:]) + df = dfs[0] + namen = df['naam'] + + crew = Crew(name=name) + crew.save() + for i in range(nr): + try: + names = df['naam'][i].split(' ') + first_name = ' '.join(names[:-1]) + last_name = names[-1] + except AttributeError: + try: + first_name = str(df['naam'][i]) + last_name = '' + except TypeError: + first_name = 'Unknown' + last_name = 'Athlete' + athletes = Athlete.objects.filter(first_name = first_name, + last_name = last_name) + if len(athletes) >= 1: + athlete = athletes[0] + else: + athlete = Athlete(first_name=first_name, + last_name=last_name, + club = name) + athlete.save() + print(athlete) + + crew.athletes.add(athlete) + + result = Result( + crew = crew, + race = race, + order = order + ) + try: + result.save() + except ValidationError as e: + print(e) + + order += 1 + + print('') diff --git a/boatmovers/templates/boatmovers.html b/boatmovers/templates/boatmovers.html index 5b133e25..87db2bb2 100644 --- a/boatmovers/templates/boatmovers.html +++ b/boatmovers/templates/boatmovers.html @@ -18,6 +18,9 @@ {{ athlete.last_name }} {{ athlete.club }} {{ athlete.gender }} + {% if user.is_authenticated and user.is_staff %} + edit + {% endif %} {% endfor %} diff --git a/boatmovers/templates/race.html b/boatmovers/templates/race.html index 347aed79..8f602c69 100644 --- a/boatmovers/templates/race.html +++ b/boatmovers/templates/race.html @@ -14,6 +14,11 @@ {{ result.order }} {{ result.crew.name }} + {% if result.crew.id in duplicate_crews %} + ! + {% elif result.crew.id in duplicate_athletes_crews %} + ! + {% endif %} {% endfor %} diff --git a/boatmovers/views.py b/boatmovers/views.py index 16d1960a..94f3cb0b 100644 --- a/boatmovers/views.py +++ b/boatmovers/views.py @@ -3,6 +3,8 @@ from django.http import HttpResponse, HttpResponseRedirect from django.shortcuts import get_object_or_404 from django.urls import reverse +import collections + # Create your views here. from django.views.generic.edit import CreateView from boatmovers.models import Athlete, Crew, Race, Result @@ -50,7 +52,7 @@ class ResultCreateView(CreateView): success_url = '/boatmovers/' def boatmovers_view(request): - athletes = Athlete.objects.all().order_by('-trueskill_exposed','-birth_year','last_name','first_name') + athletes = Athlete.objects.filter(trueskill_exposed__gt=0).order_by('-trueskill_exposed','-birth_year','last_name','first_name') races = Race.objects.filter(verified=True,processed=True).order_by('-date') new_races = Race.objects.filter(processed=False).order_by('date') @@ -66,10 +68,34 @@ def race_view(request,id=0): race = get_object_or_404(Race, pk=id) results = race.results.all().order_by('order') + crews = [] + athletes = [] + + for result in results: + crews.append(result.crew.id) + for athlete in result.crew.athletes.all(): + athletes.append(athlete.id) + + # duplicates + duplicate_athletes = [item for item, count in collections.Counter(athletes).items() if count>1] + duplicate_crews = [item for item, count in collections.Counter(crews).items() if count>1] + duplicate_athletes_crews = [] + for athlete_id in duplicate_athletes: + athlete = Athlete.objects.get(id=athlete_id) + crews = [crew.id for crew in athlete.athlete_crews.all()] + for crew in crews: + duplicate_athletes_crews.append(crew) + + return render(request, 'race.html', - {'race':race, - 'results':results} + { + 'race':race, + 'results':results, + 'duplicate_athletes':duplicate_athletes, + 'duplicate_crews':duplicate_crews, + 'duplicate_athletes_crews':duplicate_athletes_crews + } ) def race_verify(request, id=0): diff --git a/db.sqlite3-journal b/db.sqlite3-journal new file mode 100644 index 0000000000000000000000000000000000000000..18977d1d7cf8f4540a475472617c8437cc685ebb GIT binary patch literal 12824 zcmeHOdvILUdB6KO+P5yYv|g<@i&nUbW57+ZVQeKBODox0R=bvvgba2PINCkdUg_?= z^4+_Zt+oUk+7PZoJ)wD!Fo}n03$YV7nY!df2pK1C>xS^iOrT6d>6C<&hLq64(3C)b z=bXEe1#+?fY-cs&)%x7?edqgr-}8LCQk2D?y=PuA6vg+ZqIj`*(c**VyZ^@sc*!+~p~6Vao=H={p`L|QYEqro2szZ*Oj{FHxRupHbG z+!&1euJQiX_gnwrzypC#1m*%C_I@X@DR4<3((-c44_i*Qe75DFZ^HM5@P~p*%Lm9j zd5gS6o+hWsUy{%G-}GuEM}E)qglArTQGHtduKMSy3t&b zi_kjXzl2u!Rt8`2{NQ4;HtqH1O=@I{)Y8}#t7Yr8y>({S0w+Cpb&&2h@wimcs+r+h zsj`n6fs^Fvb`no{y+d`w%$b!cHTYsYLE6@My+yje-=fty%_z+<9qa9@NW7D;TlTPR zGo!+^DwZ!=2@0`Xx{(?T=aJS9(wPKXORL!$rN#)eHJjnVr74o`;(KkoqS0EPQMQ;l zSDQJ2XYWmt?zq=mV5X6QbeS@<3ZTW4i&v2p*cEBr(n>66)+ZTXeoum=vAjbonVeuJ z)h)&kNhV2(%c((GrspZOYWtX8#*>yd66bQ6%ou`d%)*-QA`%BSoJ-c^!h7Q+4LOQT zGa!OJq%*GNO^ULLB!F>IvrCmhW>Hq=*pgAnGjCesTyq&&)8cmdeTfK1bW~%; z4Bx#sNjl+ZKt!+c?PE4$hQ^;%<0OG6Li{09pMeu%wUQ)h4z_TV>DmP3XT}u8lc_jK z33n6L9WrMHvSq0eoM#Ulc0#L3Hj^o`R&2H_Y%T*+Y6csr8^RZFjFY&CnToNC=@rwe zahQM6MiQV9;*8B|Tvh+`?vc`=M)W%Od;d=Z4=XK-vVz39*P3OmNCk(>J4izKxHV&D z+5DtM%UX#`ez)sD)M7IuX4#@8;rDl~Cfzu3NT;-xF=yfT2GlKcPyU#sDAY@|mbG9I zTAju6?FrJ!Db5ZrZ1?=8ldOYALvSTwl2JGbe9E$QYQX(H-koRCN-?28*COnGuO_uArj#GW7(&RF1baR5Msg;vP_kFwJ z;s}*2c*{iS?@o~}?jdzu%ZyN+*0<}X#f*8b#vKWg6wjzprF}Xm%H}*!7+ym*io#th)`*aoX5xvC=Nb;;=OngyX~AuJ+2p41A8aRW!dKx2 zCbMb+nU_|OHt2-CePt%F{oI1)Tv3D?G(VCgUE(my(rm7Hff)va8~I*o1b}4>nDx1u zZE>U?SWP;`QSdIY9XY9D)_H7fXoM;r4e4fe_SBc4&G$lEm6c1|lLSct1Fkkq26vnl z(dh4$nMx#q0hB0XM)8 zi=e+BuEk4Z-mDm7bU)K4QKS4fEG`x(vZlph+1aQ76=Tq%6^i8G`P0RQ$IGhk(Ul}26or=8=J)Ku&YL8FJ4}loG0H1|$lJdo4Ilu_qg2S%W1%|~ zqFhS)Vdf$Q0Yei2FPUK$Dt7}ptH|oeq2QjC35w_E8j6r*FxPwzsngnxX5B!9`aK;a1!od9lUF<>ec=keTUL=|!$pj7 zX6Yyt$ZRKMF)V86IQlHBZfjsN;Wijp95-znPU;Uzc=^6Tkv6cFm+s8hwwo3z7&orJ zRW<|pQx9Ru$m-Odcl(yF%fOJ4!VQAV1-Kx^mr^9f8z7nQhf$cBRiPk}3KjDFv)irc@mfK3KUG(}sQUAGBb}2I1piL~WDl#_@g)3dV0F?nY z0|&&~-ENVQ>br4l6{&Q_Pc1F57SPAy2rm zTEx~YvYajQz$c}f3n;UQccfY2d!Ek&iPa?84SkVX*qm$&GHzeczLF%-5Ax)7z!*1? zC_M`u7K}2$GTbPc4UpwWZW|@@+@Og@4=&>Wqeh2a9+>r3vCZNY!uNsVKmQc(oZ>KrbCl`y2-fzKHfT`v@JWWCVud6NZ;4ULt3Zk2X6(({F`8&iM`k5k{ zcOXADi#8{4@~VbyVXK9XgFMz)6&6QZ!~0;r-l!{L6+t_P#j9jKD%6FoxlwDf&-Oe4 zrI(C6mU!2gRz+(Mv;MSETqRcvY()qBlq`D%wttlJc0dMSqJyN7XB(F)I0qHC1JC1) zn5M>r*|;+N6^D8a@;LUpfokxEgJ<|}CP~{`@#K1*8;nVF3O6g>4<2uvVHmvnnAYxIUEc9MtNV$t-fHNdPcoFNYveG zruFB6zi+*-b$9E^z#qpRjeRWE8+{|tAAKZxD7ro1jYcD9B6miLEvF+Z!#@t+-EwPq zINTOG6Z(8e5A}tj!KX1V_=C5(Q}e$~1eypm5ojXNL|_R5{mzYHWldLCz#9t$Mlmc} zzf;5DsXC~m<3>$2YP)t#WJb}TF{5{L@8e(HJ%tQD%#^* zA662{2A68d+(FBj!kkSz*fVp_bA6zF?&|4-zy9RSp#9s<(&{bOI3r;txu#P{Fot;n zvxep<@1zY%?VXhB?=NCb)Vuj;$MKWpA{hMJ{bzP<3}$*ePwd}!+qb^m>lDJu`ZmcR zi-{DUzc!lg5n4MB7FlNyGNj%wWEiiY!K;n)7Kj<66Pa<`7BxN^7)R@3^=|&ji517L z^g)hakKF$j2_EU~{PHu2`)O~pKK?HFzi|2yqw=W0OTDQWv~Tp1k? z?)kVYFCoQn$~|_raqMMIDXg@0c6q$9U|`&=4zDfJifOO|qv(@)yP4UL44mxE9OFO7 zFLNewP)hE`cpD=ebnO`4F1z~*hr+Mi-Fesp!tlGnm`*XyvX-UU;M^Qmx?FksD~(wn z_U@T6(X}s2^gib%h!vM&ZDTq*ZaPz2cI)NNN5V>{yETv7GA0FOmO}@3j_n@~E2*U9 zj9!^fc4SYw?DqA}hhYA++&+LAj$R(Y}>>MpR-FqD14B^E~7bzt9+UF=w}C{2dYJ6%eRmz|xs&ht^yyOCby^g{5sTUG1YB!Za_h2D+sO6M9J z>C#PTCELUxnZ0W@-RNxLg-9NUkD%(caeTJ72T$DuNB|>YaG?hSdL7 zUsGRJpHxq)kKmgCC)E4YXVo9#TLBNK&#T{3?^RE#pH`2kcdECmx15u2^Fb4VCIU?a zng}!zXd=)=pou^efhGb?1eypm5%|A`0KU(sKFhz|ip%&thbws9co^S+_H-V8FJ9A! z!+3DX$3KYoN#|y~zw6L2-iHpy@w(FCcjOxmb>a2;Ln>AhP7d!kALQ%yp^bR`qSJ%* bl!L$a@N_sgV5R$D0k4TXUg8(?zXSOnyG0?u literal 0 HcmV?d00001