189 lines
7.3 KiB
Python
189 lines
7.3 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
from __future__ import print_function
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
from django.test import TestCase, Client,override_settings
|
|
from django.core.management import call_command
|
|
from django.utils.six import StringIO
|
|
from django.test.client import RequestFactory
|
|
from .views import c2_open
|
|
from rowers.models import Workout, User, Rower, WorkoutForm,RowerForm,GraphImage
|
|
from rowers.forms import DocumentsForm,CNsummaryForm,RegistrationFormUniqueEmail
|
|
import rowers.plots as plots
|
|
import rowers.interactiveplots as iplots
|
|
import datetime
|
|
from rowingdata import rowingdata as rdata
|
|
from rowingdata import rower as rrower
|
|
from django.utils import timezone
|
|
from rowers.rows import handle_uploaded_file
|
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
|
from time import strftime,strptime,mktime,time,daylight
|
|
import os
|
|
from rowers.tasks import handle_makeplot
|
|
from rowers.utils import serialize_list,deserialize_list
|
|
|
|
from shutil import copyfile
|
|
|
|
from minimocktest import MockTestCase
|
|
import pandas as pd
|
|
|
|
import json
|
|
import numpy as np
|
|
|
|
from rowers import urls
|
|
from rowers.views import error500_view,error404_view,error400_view,error403_view
|
|
|
|
from dataprep import delete_strokedata
|
|
|
|
from redis import StrictRedis
|
|
redis_connection = StrictRedis()
|
|
|
|
VERBOSE = True
|
|
|
|
class TraverseLinksTest(TestCase):
|
|
def setUp(self):
|
|
self.u = User.objects.create_superuser(
|
|
'superuser1',
|
|
'superuser1@example.com','pwd')
|
|
self.r = Rower.objects.create(user=self.u,gdproptin=True,gdproptindate=timezone.now())
|
|
nu = datetime.datetime.now()
|
|
|
|
self.w = Workout.objects.create(
|
|
name='testworkout',workouttype='On-water',
|
|
user=self.r,date=nu.strftime('%Y-%m-%d'),
|
|
starttime=nu.strftime('%H:%M:%S'),
|
|
duration="0:55:00",distance=8000)
|
|
self.w2 = Workout.objects.create(
|
|
name='testworkout 2',workouttype='On-water',
|
|
user=self.r,date=nu.strftime('%Y-%m-%d'),
|
|
starttime=nu.strftime('%H:%M:%S'),
|
|
duration="0:55:00",distance=8000)
|
|
if self.client.login(
|
|
username="superuser1", password="pwd"):
|
|
if VERBOSE:
|
|
print('\nLogin as superuser OK')
|
|
else:
|
|
raise BaseException('Login failed')
|
|
|
|
@classmethod
|
|
def setUpTestData(cls):
|
|
# Initialise your database here as needed
|
|
pass
|
|
|
|
|
|
def test_traverse_urls(self):
|
|
# Fill these lists as needed with your site specific URLs to check and to avoid
|
|
to_traverse_list = ['/rowers/list-workouts']
|
|
to_avoid_list = ['^/$', '^$', 'javascript:history\.back()',
|
|
'javascript:history\.go\(-1\)', '^mailto:.*',
|
|
'.*github\.io.*', 'javascript:.*',
|
|
'.*biorow\.com.*','.*facebook.*',
|
|
'.*wordpress.*','.*analytics.*','.*freenet.*',
|
|
'.*twitter.*','^blog.*',
|
|
'.*\d+-\d+-\d+.*',
|
|
'.*flexchart/.*',
|
|
'.*heroku.*',
|
|
'.*oauth.*',
|
|
'.*rowingdata.*',
|
|
'.*thisisant.*',
|
|
'.*garmin.*',
|
|
'.*sub7.*',
|
|
'.*bitbucket.*',
|
|
'.*rathburn.*',
|
|
'.*team.*',
|
|
'.*concept2.*',
|
|
'.*static.*',
|
|
'.*authorize.*',
|
|
'.*youtu.*',
|
|
'.*earth.*',
|
|
'.*c2list.*',
|
|
'.*stravaimport.*',
|
|
'.*performancephones.*',
|
|
'.*sporttracks.*',
|
|
'.*join-select.*',
|
|
]
|
|
|
|
done_list = []
|
|
error_list = []
|
|
source_of_link = dict()
|
|
for link in to_traverse_list:
|
|
source_of_link[link] = 'initial'
|
|
|
|
(to_traverse_list, to_avoid_list, done_list, error_list, source_of_link) = \
|
|
self.recurse_into_path(to_traverse_list, to_avoid_list, done_list, error_list, source_of_link)
|
|
|
|
print('END REACHED\nStats:')
|
|
if VERBOSE: print('\nto_traverse_list = ' + str(to_traverse_list))
|
|
if VERBOSE: print('\nto_avoid_list = ' + str(to_avoid_list))
|
|
if VERBOSE: print('\nsource_of_link = ' + str(source_of_link))
|
|
if VERBOSE: print('\ndone_list = ' + str(done_list))
|
|
print('Followed ' + str(len(done_list)) + ' links successfully')
|
|
print('Avoided ' + str(len(to_avoid_list)) + ' links')
|
|
|
|
if error_list:
|
|
print('!! ' + str(len(error_list)) + ' error(s) : ')
|
|
for error in error_list:
|
|
print(str(error) + ' found in page ' + source_of_link[error[0]])
|
|
|
|
print('Errors found traversing links')
|
|
assert False
|
|
else:
|
|
print('No errors')
|
|
|
|
def recurse_into_path(self, to_traverse_list, to_avoid_list, done_list, error_list, source_of_link):
|
|
""" Dives into first item of to_traverse_list
|
|
Returns: (to_traverse_list, to_avoid_list, done_list, source_of_link)
|
|
"""
|
|
|
|
if to_traverse_list:
|
|
url = to_traverse_list.pop()
|
|
|
|
if not match_any(url, to_avoid_list):
|
|
print('Surfing to ' + str(url) + ', discovered in ' + str(source_of_link[url]))
|
|
response = self.client.get(url, follow=True)
|
|
|
|
if response.status_code == 200:
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
text = soup.get_text()
|
|
|
|
for link in soup.find_all('a'):
|
|
new_link = link.get('href')
|
|
if VERBOSE: print(' Found link: ' + str(new_link))
|
|
if match_any(new_link, to_avoid_list):
|
|
if VERBOSE: print(' Avoiding it')
|
|
elif new_link in done_list:
|
|
if VERBOSE: print(' Already done, ignoring')
|
|
elif new_link in to_traverse_list:
|
|
if VERBOSE: print(' Already in to traverse list, ignoring')
|
|
else:
|
|
if VERBOSE: print(' New, unknown link: Storing it to traverse later')
|
|
source_of_link[new_link] = url
|
|
to_traverse_list.append(new_link)
|
|
|
|
done_list.append(url)
|
|
if VERBOSE: print('Done')
|
|
else:
|
|
error_list.append((url, response.status_code))
|
|
to_avoid_list.append(url)
|
|
|
|
if VERBOSE: print('Diving into next level')
|
|
return self.recurse_into_path(to_traverse_list, to_avoid_list, done_list, error_list, source_of_link)
|
|
|
|
else:
|
|
# Nothing to traverse
|
|
if VERBOSE: print('Returning to upper level')
|
|
return to_traverse_list, to_avoid_list, done_list, error_list, source_of_link
|
|
|
|
|
|
def match_any(my_string, regexp_list):
|
|
if my_string:
|
|
combined = "(" + ")|(".join(regexp_list) + ")"
|
|
return re.match(combined, my_string)
|
|
else:
|
|
# 'None' as string always matches
|
|
return True
|