Merge pull request #25 from paceaux/develop

Develop
paceaux · Sep 27, 2023 · 8065a31 · 8065a31
2 parents e4b52f7 + 141688f
commit 8065a31
Show file tree

Hide file tree

Showing 30 changed files with 847 additions and 687 deletions.
diff --git a/.eslintrc b/.eslintrc
@@ -1,8 +1,7 @@
 {
     "env": {
         "browser": false,
-        "commonjs": true,
-        "es2021": true
+        "es2022": true
     },
     "extends": "airbnb-base",
     "parserOptions": {
@@ -17,5 +16,6 @@
         "afterAll": true
     },
     "rules": {
+        "import/extensions": "off"
     }
 }
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,9 +1,9 @@
 name: CI
 on:
   push:
-    branches: [master]
+    branches: [master, develop]
   pull_request:
-    branches: [master]
+    branches: [master, develop]
 jobs:
   run-ci:
     runs-on: ubuntu-latest

diff --git a/README.md b/README.md
@@ -1,59 +1,65 @@
 # Selector Hound: sniff out CSS Selectors on a site
 
-`SelectorHound` lets you find CSS selectors on a public or local site. Rename, refactor, and delete unused CSS with a bloodhound on your side. 
+`SelectorHound` lets you find CSS selectors on a public or local site. Rename, refactor, and delete unused CSS with a bloodhound on your side.
 
-A sitemap or a site URL is enough to get started. Provide a single CSS selector, a comma separated string of selectors, or even a stylesheet. 
+A sitemap or a site URL is enough to get started. Provide a single CSS selector, a comma separated string of selectors, or even a stylesheet.
 
-Pages with _zero_ matches aren't put in the results. Pages with at least *one* match are in the result, and you find out which CSS selectors _aren't_ used. (That's right, it's a selector _finder_ **and** ... "not founder").
+Pages with _zero_ matches aren't put in the results. Pages with at least _one_ match are in the result, and you find out which CSS selectors _aren't_ used. (That's right, it's a selector _finder_ **and** ... "not founder").
 
 Optionally take a screenshot of the elements (though it may hurt performance).
 
-Pages not showing up that should? Check the `log.txt` for any issues. 
+Pages not showing up that should? Check the `log.txt` for any issues.
 
 ## Installation
 
 ### Prerequisites
-- Node LTS (as of September 2022, Node 16.17.0)
+
+- Node LTS (as of September 2023, Node 18.16.0)
 
 - If you want to use the `-d` or `-c` (`--isSpa` and `--takeScreenshots` options), this requires Puppeteer which in turn requires Chromium.  
 
 - If running this on a Mac, be sure you install chromium without a quarantine flag: `brew install chromium --no-quarantine`
 
-### Running on-demand:
+### Running on-demand
+
 Download this package. Then run
 
-```console
+```shell
 npm install
 ```
 
 ### Globally via NPM
 
-```console
+```shell
 npm i -g selector-hound
 ```
 
 ## Usage
 
 ### Basic Scanning
+
 Only scan the first 20 pages for `.yourthing`
 
-```
+```shell
 SelectorHound --sitemap=https://wherever.com/xml --limit=20 --selector=".yourthing"
 SelectorHound -u https://wherever.com/xml -l 20 -s ".yourthing"
 ```
 
 ### Re-using, regenerating, and providing a list of links
-Before the site scanning begins, this generates a `<site>.sitemap.json` file containing all of the links it will scan. This file is generated from the `sitemap.xml` file you provided **or** from crawling the site looking for links. To improve performance, SelectorHound will look for this file _first_ before attempting to retrieve/generate a sitemap. 
+
+Before the site scanning begins, this generates a `<site>.sitemap.json` file containing all of the links it will scan. This file is generated from the `sitemap.xml` file you provided **or** from crawling the site looking for links. To improve performance, SelectorHound will look for this file _first_ before attempting to retrieve/generate a sitemap.
 
 If you want to re-generate this `<site>.sitemap.json` file, you can force it:
 
-```
+```shell
 SelectorHound --sitemap=https://wherever.com/xml  --selector=".yourthing" --dontUseExportedSitemap
 SelectorHound -u https://mysite.com/landing -r -s '.yourThing' -X
 ```
 
 #### Formatting
+
 By default, SelectorHound will generate a format that's based off of how sitemap XML looks, which is an array of objects with a `loc` property:
+
 ```JavaScript
 [
     {
@@ -66,29 +72,32 @@ By default, SelectorHound will generate a format that's based off of how sitemap
 ```
 
 However, you can also provide your own list of links as just an array of strings:
+
 ```JavaScript
     [
         "https://mysite.com/path",
         "https://mysite.com/another"
     ]
 ```
 
-
-
 ### Crawling instead of using a sitemap
+
 Crawl the site, starting from a landing page.
 
 ```shell
 SelectorHound -u https://mysite.com/landing -r -s ".myClass"
 ```
 
 ### Taking Screenshots or dealing with SPAs
+
 Scan the first 20 pages and take screenshots
+
 ```shell
 SelectorHound -u https://wherever.com/xml -l 20 -s ".yourthing" -c
 ```
 
 Scan those pages, but treat them like Single Page Applications (`-d`), and search for all the selectors in `mystyles.css`
+
 ```shell
 SelectorHound -u https://wherever.com/xml -f "mystyles.css" -d
 
@@ -105,10 +114,7 @@ SelectorHound -u https://wherever.com/xml -f "mystyles.css" -d
 | `--cssFile` | `-f`  | A CSS file to use instead of a single selector. Optional. |   |
 | `--isSpa`| `-d`  | Uses Puppeteer instead of Cheerio (in case some content is dynamic). Optional. | `false`|
 | `--takeScreenshots`| `-c`  | Takes screenshots with Puppeteer. Optional. | `false` |
-| `--outputFileName` | `-o` | A provided value will be prepended to `pages.json` and will be output in your current directory. Оptional. | `pages.json` | 
+| `--outputFileName` | `-o` | A provided value will be prepended to `pages.json` and will be output in your current directory. Оptional. | `pages.json` |
 | `--showElementDetails` | `-e`  | Show details for elements that match result (`tag`, `innerText`, `attributes`) . Optional. |   `false` |
 | `--showHtml` | `-m` | Shows HTML of the elements that match the result. Optional. | `true` |
 | `--crawl`| `-r` | Crawls the site instead of using a sitemap. Outputs a file called `<sitename>.sitemap.json`. Optional.  | `false` |
-
-
-
diff --git a/cli.js b/cli.js
@@ -1,9 +1,9 @@
 #!/usr/bin/env node
 
-const yargs = require('yargs/yargs');
-const { hideBin } = require('yargs/helpers');
+import yargs from 'yargs/yargs';
+import { hideBin } from 'yargs/helpers';
 
-const {
+import {
   LOG_FILE_NAME,
   DEFAULT_SITEMAP_URL,
   DEFAULT_SELECTOR,
@@ -14,12 +14,13 @@ const {
   DEFAULT_SHOW_ELEMENT_DETAILS,
   DEFAULT_SHOW_HTML,
   DEFAULT_CRAWL,
-} = require('./src/constants');
-const SelectorFinder = require('./src/selector-finder');
-const SiteCrawler = require('./src/site-crawler');
-const Outputter = require('./src/outputter');
-const Log = require('./src/logger');
-const CSSReader = require('./src/css-reader');
+} from './src/constants.js';
+
+import SelectorFinder from './src/selector-finder.js';
+import SiteCrawler from './src/site-crawler.js';
+import Outputter from './src/outputter.js';
+import Log from './src/logger.js';
+import CSSReader from './src/css-reader.js';
 
 const log = new Log(LOG_FILE_NAME);
 

diff --git a/jest.config.js b/jest.config.js
@@ -3,7 +3,7 @@
  * https://jestjs.io/docs/en/configuration.html
  */
 
-module.exports = {
+export default {
 
   // All imported modules in your tests should be mocked automatically
   // automock: false,
@@ -56,7 +56,7 @@ module.exports = {
   // Force coverage collection from ignored files using an array of glob patterns
   // forceCoverageMatch: [],
 
-  // A path to a module which exports an async function that is triggered once before 
+  // A path to a module which exports an async function that is triggered once before
   // all test suites
   globalSetup: './test/config/setup.js',
 
@@ -140,15 +140,15 @@ module.exports = {
   // set up the testing framework before each test
   // setupFilesAfterEnv: [],
 
-  // The number of seconds after which a test is considered as slow 
+  // The number of seconds after which a test is considered as slow
   // and reported as such in the results.
   // slowTestThreshold: 5,
 
   // A list of paths to snapshot serializer modules Jest should use for snapshot testing
   // snapshotSerializers: [],
 
   // The test environment that will be used for testing
-  testEnvironment: './test/config/puppeteer_environment.js',
+  // testEnvironment: './test/config/puppeteer_environment.js',
 
   // Options that will be passed to the testEnvironment
   // testEnvironmentOptions: {},
@@ -185,7 +185,7 @@ module.exports = {
   // timers: "real",
 
   // A map from regular expressions to paths to transformers
-  // transform: undefined,
+  transform: {},
 
   // An array of regexp pattern strings that are matched against all source file paths,
   // matched files will skip transformation