Private
Public Access
1
0

age group records from C2 database web scraping

This commit is contained in:
Sander Roosendaal
2017-12-13 13:40:07 +01:00
parent 4b50d6694e
commit 098dbb9c4a
5 changed files with 80 additions and 42 deletions

View File

@@ -151,16 +151,54 @@ class PowerZonesField(models.TextField):
value = self._get_val_from_obj(obj)
return self.get_deb_prep_value(value)
# Age records
def save_agegroup(df,weightcategory,sex):
for id,row in df.iterrows():
agemin = int(row['Age2'])
agemax = int(row['Age3'])
duration = row['Time']
power = int(row['Power'])
season = int(row['Season'])
name = row['Name']
distance = int(row['Distance'])
c2url = 'http://www.concept2.com/indoor-rowers/racing/records/world?machine=1&event=All&gender=All&age=All&weight=All'
def update_records(url=c2url):
try:
dfs = pd.read_html(url,attrs={'class':'views-table'})
df = dfs[0]
df.columns = df.columns.str.strip()
success = 1
except:
df = pd.DataFrame()
if not df.empty:
C2WorldClassAgePerformance.objects.all().delete()
df.Gender = df.Gender.apply(lambda x: 'male' if x=='M' else 'female')
df['Distance'] = df['Event']
df['Duration'] = 0
for nr,row in df.iterrows():
if 'm' in row['Record']:
df.ix[nr,'Distance'] = row['Record'][:-1]
df.ix[nr,'Duration'] = 60*row['Event']
else:
df.ix[nr,'Distance'] = row['Event']
try:
tobj = datetime.datetime.strptime(row['Record'],'%M:%S.%f')
except ValueError:
tobj = datetime.datetime.strptime(row['Record'],'%H:%M:%S.%f')
df.ix[nr,'Duration'] = 3600.*tobj.hour+60.*tobj.minute+tobj.second+tobj.microsecond/1.e6
print row.Duration
for nr,row in df.iterrows():
try:
weightcategory = row.Weight.lower()
except AttributeError:
weightcategory = 'hwt'
sex = row.Gender
name = row.Name
age = int(row.Age)
distance = int(row.Distance)
duration = float(row.Duration)
season = int(row.Season)
velo = distance/duration
power = int(2.8*velo**3)
record = C2WorldClassAgePerformance(
age = age,
weightcategory = weightcategory,
@@ -171,26 +209,11 @@ def save_agegroup(df,weightcategory,sex):
season = season,
name = name,
)
record.save()
print record
def make_records(readfile):
xls = pd.ExcelFile(readfile)
female_df = xls.parse('Female')
female_lw_df = xls.parse('Female LW')
male_df = xls.parse('Male')
male_lw_df = xls.parse('Male LW')
save_agegroup(female_df,'hwt','female')
save_agegroup(male_df,'hwt','male')
save_agegroup(female_lw_df,'lwt','female')
save_agegroup(male_lw_df,'lwt','male')
c2url = 'http://www.concept2.com/indoor-rowers/racing/records/world?machine=1&event=All&gender=All&age=All&weight=All'
def update_records(url=c2url):
dfs = pd.read_html(url,attrs={'class':'views-table'})
df = dfs[0]
df.columns = df.columns.str.strip()
try:
record.save()
except:
print record
class C2WorldClassAgePerformance(models.Model):
weightcategories = (
@@ -215,7 +238,7 @@ class C2WorldClassAgePerformance(models.Model):
distance = models.IntegerField(default=2000)
name = models.CharField(max_length=200,blank=True)
duration = models.TimeField(default=1,blank=True)
duration = models.FloatField(default=1,blank=True)
season = models.IntegerField(default=2013)
power = models.IntegerField(default=200)