Private
Public Access
1
0
This commit is contained in:
Sander Roosendaal
2022-02-15 08:05:12 +01:00
parent 5b3d7fcf2c
commit 8af7ac8af4
71 changed files with 19992 additions and 19476 deletions

View File

@@ -5,13 +5,13 @@ from __future__ import unicode_literals
from __future__ import print_function
from bs4 import BeautifulSoup
import re
from django.test import TestCase, Client,override_settings
from django.test import TestCase, Client, override_settings
from django.core.management import call_command
from django.utils.six import StringIO
from django.test.client import RequestFactory
from .views import c2_open
from rowers.models import Workout, User, Rower, WorkoutForm,RowerForm,GraphImage
from rowers.forms import DocumentsForm,CNsummaryForm,RegistrationFormUniqueEmail
from rowers.models import Workout, User, Rower, WorkoutForm, RowerForm, GraphImage
from rowers.forms import DocumentsForm, CNsummaryForm, RegistrationFormUniqueEmail
import rowers.plots as plots
import rowers.interactiveplots as iplots
import datetime
@@ -20,10 +20,10 @@ from rowingdata import rower as rrower
from django.utils import timezone
from rowers.rows import handle_uploaded_file
from django.core.files.uploadedfile import SimpleUploadedFile
from time import strftime,strptime,mktime,time,daylight
from time import strftime, strptime, mktime, time, daylight
import os
from rowers.tasks import handle_makeplot
from rowers.utils import serialize_list,deserialize_list
from rowers.utils import serialize_list, deserialize_list
from shutil import copyfile
@@ -34,7 +34,7 @@ import json
import numpy as np
from rowers import urls
from rowers.views import error500_view,error404_view,error400_view,error403_view
from rowers.views import error500_view, error404_view, error400_view, error403_view
from dataprep import delete_strokedata
@@ -43,24 +43,26 @@ redis_connection = StrictRedis()
VERBOSE = True
class TraverseLinksTest(TestCase):
def setUp(self):
self.u = User.objects.create_superuser(
'superuser1',
'superuser1@example.com','pwd')
self.r = Rower.objects.create(user=self.u,gdproptin=True,gdproptindate=timezone.now())
'superuser1@example.com', 'pwd')
self.r = Rower.objects.create(
user=self.u, gdproptin=True, gdproptindate=timezone.now())
nu = datetime.datetime.now()
self.w = Workout.objects.create(
name='testworkout',workouttype='On-water',
user=self.r,date=nu.strftime('%Y-%m-%d'),
starttime=nu.strftime('%H:%M:%S'),
duration="0:55:00",distance=8000)
self.w2 = Workout.objects.create(
name='testworkout 2',workouttype='On-water',
user=self.r,date=nu.strftime('%Y-%m-%d'),
starttime=nu.strftime('%H:%M:%S'),
duration="0:55:00",distance=8000)
self.w = Workout.objects.create(
name='testworkout', workouttype='On-water',
user=self.r, date=nu.strftime('%Y-%m-%d'),
starttime=nu.strftime('%H:%M:%S'),
duration="0:55:00", distance=8000)
self.w2 = Workout.objects.create(
name='testworkout 2', workouttype='On-water',
user=self.r, date=nu.strftime('%Y-%m-%d'),
starttime=nu.strftime('%H:%M:%S'),
duration="0:55:00", distance=8000)
if self.client.login(
username="superuser1", password="pwd"):
if VERBOSE:
@@ -73,16 +75,15 @@ class TraverseLinksTest(TestCase):
# Initialise your database here as needed
pass
def test_traverse_urls(self):
# Fill these lists as needed with your site specific URLs to check and to avoid
to_traverse_list = ['/rowers/list-workouts']
to_avoid_list = ['^/$', '^$', 'javascript:history\.back()',
'javascript:history\.go\(-1\)', '^mailto:.*',
'.*github\.io.*', 'javascript:.*',
'.*biorow\.com.*','.*facebook.*',
'.*wordpress.*','.*analytics.*','.*freenet.*',
'.*twitter.*','^blog.*',
'.*biorow\.com.*', '.*facebook.*',
'.*wordpress.*', '.*analytics.*', '.*freenet.*',
'.*twitter.*', '^blog.*',
'.*\d+-\d+-\d+.*',
'.*flexchart/.*',
'.*heroku.*',
@@ -104,7 +105,7 @@ class TraverseLinksTest(TestCase):
'.*performancephones.*',
'.*sporttracks.*',
'.*join-select.*',
]
]
done_list = []
error_list = []
@@ -113,20 +114,26 @@ class TraverseLinksTest(TestCase):
source_of_link[link] = 'initial'
(to_traverse_list, to_avoid_list, done_list, error_list, source_of_link) = \
self.recurse_into_path(to_traverse_list, to_avoid_list, done_list, error_list, source_of_link)
self.recurse_into_path(
to_traverse_list, to_avoid_list, done_list, error_list, source_of_link)
print('END REACHED\nStats:')
if VERBOSE: print('\nto_traverse_list = ' + str(to_traverse_list))
if VERBOSE: print('\nto_avoid_list = ' + str(to_avoid_list))
if VERBOSE: print('\nsource_of_link = ' + str(source_of_link))
if VERBOSE: print('\ndone_list = ' + str(done_list))
if VERBOSE:
print('\nto_traverse_list = ' + str(to_traverse_list))
if VERBOSE:
print('\nto_avoid_list = ' + str(to_avoid_list))
if VERBOSE:
print('\nsource_of_link = ' + str(source_of_link))
if VERBOSE:
print('\ndone_list = ' + str(done_list))
print('Followed ' + str(len(done_list)) + ' links successfully')
print('Avoided ' + str(len(to_avoid_list)) + ' links')
if error_list:
print('!! ' + str(len(error_list)) + ' error(s) : ')
for error in error_list:
print(str(error) + ' found in page ' + source_of_link[error[0]])
print(str(error) + ' found in page ' +
source_of_link[error[0]])
print('Errors found traversing links')
assert False
@@ -142,7 +149,8 @@ class TraverseLinksTest(TestCase):
url = to_traverse_list.pop()
if not match_any(url, to_avoid_list):
print('Surfing to ' + str(url) + ', discovered in ' + str(source_of_link[url]))
print('Surfing to ' + str(url) +
', discovered in ' + str(source_of_link[url]))
response = self.client.get(url, follow=True)
if response.status_code == 200:
@@ -152,30 +160,39 @@ class TraverseLinksTest(TestCase):
for link in soup.find_all('a'):
new_link = link.get('href')
if VERBOSE: print(' Found link: ' + str(new_link))
if VERBOSE:
print(' Found link: ' + str(new_link))
if match_any(new_link, to_avoid_list):
if VERBOSE: print(' Avoiding it')
if VERBOSE:
print(' Avoiding it')
elif new_link in done_list:
if VERBOSE: print(' Already done, ignoring')
if VERBOSE:
print(' Already done, ignoring')
elif new_link in to_traverse_list:
if VERBOSE: print(' Already in to traverse list, ignoring')
if VERBOSE:
print(' Already in to traverse list, ignoring')
else:
if VERBOSE: print(' New, unknown link: Storing it to traverse later')
if VERBOSE:
print(
' New, unknown link: Storing it to traverse later')
source_of_link[new_link] = url
to_traverse_list.append(new_link)
done_list.append(url)
if VERBOSE: print('Done')
if VERBOSE:
print('Done')
else:
error_list.append((url, response.status_code))
to_avoid_list.append(url)
if VERBOSE: print('Diving into next level')
if VERBOSE:
print('Diving into next level')
return self.recurse_into_path(to_traverse_list, to_avoid_list, done_list, error_list, source_of_link)
else:
# Nothing to traverse
if VERBOSE: print('Returning to upper level')
if VERBOSE:
print('Returning to upper level')
return to_traverse_list, to_avoid_list, done_list, error_list, source_of_link