From 098dbb9c4a0c36873dbb14770bc4185533046c0b Mon Sep 17 00:00:00 2001 From: Sander Roosendaal Date: Wed, 13 Dec 2017 13:40:07 +0100 Subject: [PATCH] age group records from C2 database web scraping --- rowers/interactiveplots.py | 2 +- rowers/metrics.py | 2 +- rowers/models.py | 85 ++++++++++++++++++++++++-------------- rowers/urls.py | 4 ++ rowers/views.py | 29 +++++++++---- 5 files changed, 80 insertions(+), 42 deletions(-) diff --git a/rowers/interactiveplots.py b/rowers/interactiveplots.py index 3b187788..3fad9778 100644 --- a/rowers/interactiveplots.py +++ b/rowers/interactiveplots.py @@ -1071,7 +1071,7 @@ def interactive_otwcpchart(powerdf,promember=0): def interactive_agegroup_plot(df): - age = df['agemin'] + age = df['age'] power = df['power'] poly_coefficients = np.polyfit(age,power,6) diff --git a/rowers/metrics.py b/rowers/metrics.py index ee4233bd..e4efe26f 100644 --- a/rowers/metrics.py +++ b/rowers/metrics.py @@ -329,7 +329,7 @@ def getagegroup2k(age,sex='male',weightcategory='hwt'): ) ) - ages = df['agemin'] + ages = df['age'] powers = df['power'] poly_coefficients = np.polyfit(ages,powers,6) diff --git a/rowers/models.py b/rowers/models.py index 5227e829..8e024c1d 100644 --- a/rowers/models.py +++ b/rowers/models.py @@ -151,16 +151,54 @@ class PowerZonesField(models.TextField): value = self._get_val_from_obj(obj) return self.get_deb_prep_value(value) -# Age records -def save_agegroup(df,weightcategory,sex): - for id,row in df.iterrows(): - agemin = int(row['Age2']) - agemax = int(row['Age3']) - duration = row['Time'] - power = int(row['Power']) - season = int(row['Season']) - name = row['Name'] - distance = int(row['Distance']) + +c2url = 'http://www.concept2.com/indoor-rowers/racing/records/world?machine=1&event=All&gender=All&age=All&weight=All' + +def update_records(url=c2url): + try: + dfs = pd.read_html(url,attrs={'class':'views-table'}) + df = dfs[0] + df.columns = df.columns.str.strip() + success = 1 + except: + df = pd.DataFrame() + + if not df.empty: + C2WorldClassAgePerformance.objects.all().delete() + + df.Gender = df.Gender.apply(lambda x: 'male' if x=='M' else 'female') + df['Distance'] = df['Event'] + df['Duration'] = 0 + + for nr,row in df.iterrows(): + if 'm' in row['Record']: + df.ix[nr,'Distance'] = row['Record'][:-1] + df.ix[nr,'Duration'] = 60*row['Event'] + else: + df.ix[nr,'Distance'] = row['Event'] + try: + tobj = datetime.datetime.strptime(row['Record'],'%M:%S.%f') + except ValueError: + tobj = datetime.datetime.strptime(row['Record'],'%H:%M:%S.%f') + df.ix[nr,'Duration'] = 3600.*tobj.hour+60.*tobj.minute+tobj.second+tobj.microsecond/1.e6 + + print row.Duration + for nr,row in df.iterrows(): + try: + weightcategory = row.Weight.lower() + except AttributeError: + weightcategory = 'hwt' + + sex = row.Gender + name = row.Name + age = int(row.Age) + distance = int(row.Distance) + duration = float(row.Duration) + season = int(row.Season) + + velo = distance/duration + power = int(2.8*velo**3) + record = C2WorldClassAgePerformance( age = age, weightcategory = weightcategory, @@ -171,26 +209,11 @@ def save_agegroup(df,weightcategory,sex): season = season, name = name, ) - record.save() - print record - -def make_records(readfile): - xls = pd.ExcelFile(readfile) - female_df = xls.parse('Female') - female_lw_df = xls.parse('Female LW') - male_df = xls.parse('Male') - male_lw_df = xls.parse('Male LW') - save_agegroup(female_df,'hwt','female') - save_agegroup(male_df,'hwt','male') - save_agegroup(female_lw_df,'lwt','female') - save_agegroup(male_lw_df,'lwt','male') - -c2url = 'http://www.concept2.com/indoor-rowers/racing/records/world?machine=1&event=All&gender=All&age=All&weight=All' - -def update_records(url=c2url): - dfs = pd.read_html(url,attrs={'class':'views-table'}) - df = dfs[0] - df.columns = df.columns.str.strip() + try: + record.save() + except: + print record + class C2WorldClassAgePerformance(models.Model): weightcategories = ( @@ -215,7 +238,7 @@ class C2WorldClassAgePerformance(models.Model): distance = models.IntegerField(default=2000) name = models.CharField(max_length=200,blank=True) - duration = models.TimeField(default=1,blank=True) + duration = models.FloatField(default=1,blank=True) season = models.IntegerField(default=2013) power = models.IntegerField(default=200) diff --git a/rowers/urls.py b/rowers/urls.py index 51371b36..ac1c26b7 100644 --- a/rowers/urls.py +++ b/rowers/urls.py @@ -121,6 +121,10 @@ urlpatterns = [ url(r'^400/$', TemplateView.as_view(template_name='400.html'),name='400'), url(r'^403/$', TemplateView.as_view(template_name='403.html'),name='403'), url(r'^imports/$', TemplateView.as_view(template_name='imports.html'), name='imports'), + url(r'^agegrouprecords/(?P\w+.*)/(?P\w+.*)/(?P\d+)m$', + views.agegrouprecordview), + url(r'^agegrouprecords/(?P\w+.*)/(?P\w+.*)/(?P\d+)min$', + views.agegrouprecordview), url(r'^agegrouprecords/(?P\w+.*)/(?P\w+.*)$', views.agegrouprecordview), url(r'^list-workouts/ranking$',views.workouts_view,{'rankingonly':True}), diff --git a/rowers/views.py b/rowers/views.py index 68bf1644..1d78b6b4 100644 --- a/rowers/views.py +++ b/rowers/views.py @@ -10932,19 +10932,30 @@ def team_members_stats_view(request,id): from rowers.models import C2WorldClassAgePerformance -def agegrouprecordview(request,sex='male',weightcategory='hwt'): - df = pd.DataFrame( - list( - C2WorldClassAgePerformance.objects.filter( - sex=sex, - weightcategory=weightcategory +def agegrouprecordview(request,sex='male',weightcategory='hwt', + distance=2000,duration=None): + if not duration: + df = pd.DataFrame( + list( + C2WorldClassAgePerformance.objects.filter( + distance=distance, + sex=sex, + weightcategory=weightcategory ).values() ) ) - - df['seconds'] = df['duration'].apply( - lambda x:dataprep.timedelta_to_seconds(x) + else: + duration = int(duration)*60 + df = pd.DataFrame( + list( + C2WorldClassAgePerformance.objects.filter( + duration=duration, + sex=sex, + weightcategory=weightcategory + ).values() + ) ) + script,div = interactive_agegroup_plot(df)