Update crawl.json
This commit is contained in:
@@ -117,21 +117,11 @@
|
|||||||
],
|
],
|
||||||
"notes": "This one should not go backwards, but it does!"
|
"notes": "This one should not go backwards, but it does!"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://news.ycombinator.com/",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.vellum.ai/llm-leaderboard",
|
"website": "https://www.vellum.ai/llm-leaderboard",
|
||||||
"expected_min_num_of_pages": 0,
|
"expected_min_num_of_pages": 0,
|
||||||
"expected_crawled_pages": [""]
|
"expected_crawled_pages": [""]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.bigbadtoystore.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.instructables.com",
|
"website": "https://www.instructables.com",
|
||||||
"expected_min_num_of_pages": 78,
|
"expected_min_num_of_pages": 78,
|
||||||
@@ -150,31 +140,6 @@
|
|||||||
"https://www.instructables.com/circuits/clocks/projects/"
|
"https://www.instructables.com/circuits/clocks/projects/"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.powells.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.royalacademy.org.uk",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.eastbaytimes.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.manchestereveningnews.co.uk",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://physicsworld.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://richmondconfidential.org",
|
"website": "https://richmondconfidential.org",
|
||||||
"expected_min_num_of_pages": 50,
|
"expected_min_num_of_pages": 50,
|
||||||
@@ -191,12 +156,6 @@
|
|||||||
"https://richmondconfidential.org/2009/10/13/family-calls-for-end-to-violence-at-memorial-for-slain-woman-friend/"
|
"https://richmondconfidential.org/2009/10/13/family-calls-for-end-to-violence-at-memorial-for-slain-woman-friend/"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.techinasia.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""],
|
|
||||||
"notes": "The website has a paywall and bot detectors."
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.boardgamegeek.com",
|
"website": "https://www.boardgamegeek.com",
|
||||||
"expected_min_num_of_pages": 15,
|
"expected_min_num_of_pages": 15,
|
||||||
@@ -217,10 +176,5 @@
|
|||||||
"https://www.boardgamegeek.com/previews",
|
"https://www.boardgamegeek.com/previews",
|
||||||
"https://www.boardgamegeek.com/browse/boardgame"
|
"https://www.boardgamegeek.com/browse/boardgame"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.mountainproject.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user