import re from bs4 import BeautifulSoup import requests from boatmovers.models import * import pandas as pd from django.core.exceptions import ValidationError url_heineken = '' def csv_reader(filename,raceid,clubcol='Ploeg',bankjes=['Slag'],uitslag='Pos',gender='m', startorder=1): race = Race.objects.get(id=raceid) nr = race.crew_size df = pd.read_csv(filename) # replace column names if different for row in df.itertuples(): order = row[df.columns.get_loc(uitslag)+1] crewname = row[df.columns.get_loc(clubcol)+1] crew = Crew(name=crewname) crew.save() for i in range(nr): try: naam = row[df.columns.get_loc(bankjes[i])+1].split(' ') first_name = ' '.join(naam[:-1]) last_name = naam[-1] dummy = False except AttributeError: try: first_name = str(row.Slag) last_name = '' dummy=False except TypeError: first_name = 'Unknown' last_name = 'Athlete' dummy=True athletes = Athlete.objects.filter(first_name = first_name, last_name = last_name, gender=gender) if len(athletes) >= 1: athlete = athletes[0] else: athlete = Athlete(first_name=first_name, last_name=last_name, club = crewname, gender=gender, dummy=dummy) try: athlete.save() except ValidationError as e: text, id = e.message.split(':') athlete = Athlete.objects.get(id=id) print(athlete) crew.athletes.add(athlete) result = Result( crew = crew, race = race, order = order ) try: if order>=startorder: result.save() except ValidationError as e: print(e) print(' ') def time_team_scraper(url,raceid,gender='m',startorder=1): race = Race.objects.get(id=raceid) nr = race.crew_size r = requests.get(url) soup = BeautifulSoup(r.content,features='lxml') tbl = soup.find('table') order = 1 str = re.search('(.*)results(.*)',url) base = str.groups()[0] for tr in tbl.findAll("tr"): trs = tr.findAll("td") for each in trs: try: link = each.find('a')['href'] name = each.find('a').contents[0] except (TypeError, IndexError): link = '' name ='' if 'entry' in link: print(order, name) dfs = pd.read_html(base+link[3:]) df = dfs[0] namen = df['naam'] crew = Crew(name=name) crew.save() for i in range(nr): try: names = df['naam'][i].split(' ') first_name = ' '.join(names[:-1]) last_name = names[-1] dummy=False except AttributeError: try: first_name = str(df['naam'][i]) last_name = '' dummy=False except TypeError: first_name = 'Unknown' last_name = 'Athlete' dummy=True athletes = Athlete.objects.filter(first_name = first_name, last_name = last_name) if len(athletes) >= 1: athlete = athletes[0] else: athlete = Athlete(first_name=first_name, last_name=last_name, club = name, gender=gender, dummy=dummy) try: athlete.save() except ValidationError as e: text, id = e.message.split(':') athlete = Athlete.objects.get(id=id) print(athlete) crew.athletes.add(athlete) result = Result( crew = crew, race = race, order = order ) if order >= startorder: try: result.save() except ValidationError as e: print(e) order += 1 print('')