go
This commit is contained in:
73
go/all_urls.txt
Normal file
73
go/all_urls.txt
Normal file
@ -0,0 +1,73 @@
|
||||
https://blog.highp.ing/#适当的空格与大写
|
||||
https://blog.highp.ing/#标点符号
|
||||
https://blog.highp.ing/#小结
|
||||
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/photoswipe.min.css
|
||||
https://blog.highp.ing/safari-pinned-tab.svg
|
||||
https://blog.highp.ing/
|
||||
https://blog.highp.ing/index.xml
|
||||
https://blog.highp.ing/p/newlogo/HighPingNetwork.d2f77b5eeb3dfc00db07dda5bc2aeca7_hu49df1706fbb734a84765f7148703f781_33088_250x150_fill_box_smart1_3.png
|
||||
https://blog.highp.ing/p/dn42/
|
||||
https://blog.highp.ing/site.webmanifest
|
||||
https://github.com/High-Ping-Network
|
||||
https://blog.highp.ing/#列表
|
||||
https://blog.highp.ing/p/writemarkdown/image-6.png
|
||||
https://blog.highp.ing/HighPingBlog.png
|
||||
https://blog.highp.ing/archives/
|
||||
https://blog.highp.ing/p/writemarkdown/image.png
|
||||
https://blog.highp.ing/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js
|
||||
https://blog.highp.ing/p/writemarkdown/
|
||||
https://blog.highp.ing/p/writemarkdown/image-10.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-12.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-2.png
|
||||
https://blog.highp.ing/p/newlogo/
|
||||
https://blog.highp.ing/favicon-16x16.png
|
||||
https://highp.ing
|
||||
https://blog.highp.ing/categories/main/
|
||||
https://blog.highp.ing/#图片的用法
|
||||
https://blog.highp.ing/p/writemarkdown/image-9.png
|
||||
https://github.com/CaiJimmy/hugo-theme-stack
|
||||
https://blog.highp.ing/p/yidongyunshouji/
|
||||
https://blog.highp.ing/p/jianshiweb/_hu37034c753bbeaf0bdc92aa24aad0f5f3_25914_03d51669136658de002d17b3b77401b4.png
|
||||
https://jimmycai.com
|
||||
https://cdn.jsdelivr.net/npm/node-vibrant@3.1.6/dist/vibrant.min.js
|
||||
https://blog.highp.ing/%E5%85%B3%E4%BA%8E/
|
||||
https://blog.highp.ing/p/writemarkdown/image-1.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-13.png
|
||||
https://blog.highp.ing/#需要高亮的地方
|
||||
https://blog.highp.ing/#无需高亮的地方
|
||||
https://blog.highp.ing/p/writemarkdown/image-18.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-16_hucaed53c3386eabc3ed1d9c996cb20685_207073_800x0_resize_box_3.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-14.png
|
||||
https://blog.highp.ing/p/chinamobilegamebooster/
|
||||
https://gohugo.io/
|
||||
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/photoswipe.min.js
|
||||
https://blog.highp.ing/scss/style.min.abbd69b2908fdfcd5179898beaafd374514a86538d81639ddd2c58c06ae54e40.css
|
||||
https://blog.highp.ing/favicon-32x32.png
|
||||
https://blog.highp.ing/links/
|
||||
https://markdown.com.cn/basic-syntax/
|
||||
https://blog.highp.ing/p/writemarkdown/image-5.png
|
||||
https://blog.highp.ing/ts/main.js
|
||||
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/default-skin/default-skin.min.css
|
||||
https://blog.highp.ing
|
||||
https://blog.highp.ing/p/writemarkdown/image-4.png
|
||||
https://blog.highp.ing/p/dn42/dn42.ab0e3124c98adc226fb768036b6c37eb_hu2aff6188372466bf0fcde6ee8756fedf_2314_250x150_fill_box_smart1_3.png
|
||||
https://blog.highp.ing/#适当的高亮
|
||||
https://blog.highp.ing/p/writemarkdown/image-11.png
|
||||
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/photoswipe-ui-default.min.js
|
||||
https://blog.highp.ing/http:/t.me/HighPingNetwork
|
||||
https://blog.highp.ing/#关于-markdown
|
||||
https://blog.highp.ing/p/writemarkdown/image-15.png
|
||||
https://blog.highp.ing/p/yidongyunshouji/61749296_p0.7e13bd353bfedd6f3da607f8ccbccb24_hu26182db6e8492496d9cea804dfb078ec_209927_250x150_fill_q75_box_smart1.jpg
|
||||
https://blog.highp.ing/apple-touch-icon.png
|
||||
https://blog.highp.ing/#标题的递进
|
||||
https://blog.highp.ing/#无需出现的标点符号
|
||||
https://blog.highp.ing/#规范
|
||||
https://blog.highp.ing/#英文标点符号
|
||||
https://blog.highp.ing/p/writemarkdown/image-8.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-7.png
|
||||
https://blog.highp.ing/p/writemarkdown/image-17.png
|
||||
https://blog.highp.ing/p/chinamobilegamebooster/img234.677508fc82cee96ddc407757e779244d_hue6d9a042141c193bafc41607b99de5f3_165854_250x150_fill_q75_h2_box_smart1_2.webp
|
||||
https://blog.highp.ing/p/jianshiweb/
|
||||
https://blog.highp.ing/search/
|
||||
https://blog.highp.ing/#引用
|
||||
https://blog.highp.ing/p/writemarkdown/image-3.png
|
5
go/go.mod
Normal file
5
go/go.mod
Normal file
@ -0,0 +1,5 @@
|
||||
module main
|
||||
|
||||
go 1.21.6
|
||||
|
||||
require golang.org/x/net v0.21.0
|
2
go/go.sum
Normal file
2
go/go.sum
Normal file
@ -0,0 +1,2 @@
|
||||
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
57
go/http.go
Normal file
57
go/http.go
Normal file
@ -0,0 +1,57 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
maxWorkers = 10
|
||||
wg sync.WaitGroup
|
||||
)
|
||||
|
||||
func worker(in <-chan string) {
|
||||
for url := range in {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to request %s\n", url)
|
||||
} else {
|
||||
defer resp.Body.Close()
|
||||
fmt.Printf("URL %s has been successfully requested\n", url)
|
||||
}
|
||||
wg.Done()
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
file, err := os.Open("url.txt")
|
||||
if err != nil {
|
||||
fmt.Println("Failed to open file:", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// 创建一个带缓冲的通道
|
||||
urls := make(chan string, maxWorkers)
|
||||
|
||||
// 启动goroutines处理请求
|
||||
for i := 0; i < maxWorkers; i++ {
|
||||
go worker(urls)
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
|
||||
// 将URL发送给 goroutines 处理
|
||||
for scanner.Scan() {
|
||||
url := scanner.Text()
|
||||
wg.Add(1)
|
||||
urls <- url
|
||||
}
|
||||
|
||||
close(urls)
|
||||
|
||||
wg.Wait()
|
||||
}
|
86
go/input_urls.txt
Normal file
86
go/input_urls.txt
Normal file
@ -0,0 +1,86 @@
|
||||
https://highp.ing
|
||||
https://blogcdn.blog.highp.ing/p/2023zongjie
|
||||
https://blogcdn.blog.highp.ing/p/GreatMusic
|
||||
https://blogcdn.blog.highp.ing/p/ceping-nomaifreehost
|
||||
https://blogcdn.blog.highp.ing/p/cf302
|
||||
https://blogcdn.blog.highp.ing/p/dns
|
||||
https://blogcdn.blog.highp.ing/p/jianshiweb
|
||||
https://blogcdn.blog.highp.ing/p/nodepanels
|
||||
https://blogcdn.blog.highp.ing/p/samopenai
|
||||
https://blogcdn.blog.highp.ing/p/xui-scanner
|
||||
https://blogcdn.blog.highp.ing/p/2024new
|
||||
https://blogcdn.blog.highp.ing/p/Rclone-Fuse
|
||||
https://blogcdn.blog.highp.ing/p/ceping-nomaius2
|
||||
https://blogcdn.blog.highp.ing/p/chinamobilegamebooster
|
||||
https://blogcdn.blog.highp.ing/p/fulltclash
|
||||
https://blogcdn.blog.highp.ing/p/magisk-delta
|
||||
https://blogcdn.blog.highp.ing/p/ourasn
|
||||
https://blogcdn.blog.highp.ing/p/screen
|
||||
https://blogcdn.blog.highp.ing/p/yidongyunshouji
|
||||
https://blogcdn.blog.highp.ing/p/Alist
|
||||
https://blogcdn.blog.highp.ing/p/ask-for-help
|
||||
https://blogcdn.blog.highp.ing/p/ceping-poloukv6
|
||||
https://blogcdn.blog.highp.ing/p/cloudflare-r2
|
||||
https://blogcdn.blog.highp.ing/p/github-student-pack
|
||||
https://blogcdn.blog.highp.ing/p/musicbot
|
||||
https://blogcdn.blog.highp.ing/p/pgptogithub
|
||||
https://blogcdn.blog.highp.ing/p/sqaipad
|
||||
https://blogcdn.blog.highp.ing/p/AndroidInstallGoogle
|
||||
https://blogcdn.blog.highp.ing/p/blogspeedup
|
||||
https://blogcdn.blog.highp.ing/p/ceping-rackdog
|
||||
https://blogcdn.blog.highp.ing/p/cloudflare-tunnel
|
||||
https://blogcdn.blog.highp.ing/p/googlepaiming
|
||||
https://blogcdn.blog.highp.ing/p/navidrome
|
||||
https://blogcdn.blog.highp.ing/p/qqpupil
|
||||
https://blogcdn.blog.highp.ing/p/vps-openvpn
|
||||
https://blogcdn.blog.highp.ing/p/FreeLinkDomain
|
||||
https://blogcdn.blog.highp.ing/p/ceping-499hkv6
|
||||
https://blogcdn.blog.highp.ing/p/ceping-rackdogau
|
||||
https://blogcdn.blog.highp.ing/p/dn42
|
||||
https://blogcdn.blog.highp.ing/p/homecloud
|
||||
https://blogcdn.blog.highp.ing/p/newlogo
|
||||
https://blogcdn.blog.highp.ing/p/ruipingcloudflare
|
||||
https://blogcdn.blog.highp.ing/p/writemarkdown
|
||||
https://blog.highp.ing
|
||||
https://blog.highp.ing/p/2023zongjie
|
||||
https://blog.highp.ing/p/GreatMusic
|
||||
https://blog.highp.ing/p/ceping-nomaifreehost
|
||||
https://blog.highp.ing/p/cf302
|
||||
https://blog.highp.ing/p/dns
|
||||
https://blog.highp.ing/p/jianshiweb
|
||||
https://blog.highp.ing/p/nodepanels
|
||||
https://blog.highp.ing/p/samopenai
|
||||
https://blog.highp.ing/p/xui-scanner
|
||||
https://blog.highp.ing/p/2024new
|
||||
https://blog.highp.ing/p/Rclone-Fuse
|
||||
https://blog.highp.ing/p/ceping-nomaius2
|
||||
https://blog.highp.ing/p/chinamobilegamebooster
|
||||
https://blog.highp.ing/p/fulltclash
|
||||
https://blog.highp.ing/p/magisk-delta
|
||||
https://blog.highp.ing/p/ourasn
|
||||
https://blog.highp.ing/p/screen
|
||||
https://blog.highp.ing/p/yidongyunshouji
|
||||
https://blog.highp.ing/p/Alist
|
||||
https://blog.highp.ing/p/ask-for-help
|
||||
https://blog.highp.ing/p/ceping-poloukv6
|
||||
https://blog.highp.ing/p/cloudflare-r2
|
||||
https://blog.highp.ing/p/github-student-pack
|
||||
https://blog.highp.ing/p/musicbot
|
||||
https://blog.highp.ing/p/pgptogithub
|
||||
https://blog.highp.ing/p/sqaipad
|
||||
https://blog.highp.ing/p/AndroidInstallGoogle
|
||||
https://blog.highp.ing/p/blogspeedup
|
||||
https://blog.highp.ing/p/ceping-rackdog
|
||||
https://blog.highp.ing/p/cloudflare-tunnel
|
||||
https://blog.highp.ing/p/googlepaiming
|
||||
https://blog.highp.ing/p/navidrome
|
||||
https://blog.highp.ing/p/qqpupil
|
||||
https://blog.highp.ing/p/vps-openvpn
|
||||
https://blog.highp.ing/p/FreeLinkDomain
|
||||
https://blog.highp.ing/p/ceping-499hkv6
|
||||
https://blog.highp.ing/p/ceping-rackdogau
|
||||
https://blog.highp.ing/p/dn42
|
||||
https://blog.highp.ing/p/homecloud
|
||||
https://blog.highp.ing/p/newlogo
|
||||
https://blog.highp.ing/p/ruipingcloudflare
|
||||
https://blog.highp.ing/p/writemarkdown
|
104
go/main.go
Normal file
104
go/main.go
Normal file
@ -0,0 +1,104 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
func main() {
|
||||
file, err := os.Open("input_urls.txt")
|
||||
if err != nil {
|
||||
fmt.Println("无法打开文件:", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
inputURL := scanner.Text()
|
||||
urls := crawlAllURLs(inputURL)
|
||||
|
||||
file, err := os.Create("all_urls.txt")
|
||||
if err != nil {
|
||||
fmt.Println("无法创建文件:", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for _, url := range urls {
|
||||
fmt.Println(url)
|
||||
fmt.Fprintf(file, "%s\n", url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func crawlAllURLs(url string) []string {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Println("无法获取网页:", err)
|
||||
return []string{}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
baseURL := getBaseURL(url)
|
||||
urls := make(map[string]struct{})
|
||||
tokenizer := html.NewTokenizer(resp.Body)
|
||||
|
||||
for {
|
||||
tokenType := tokenizer.Next()
|
||||
if tokenType == html.ErrorToken {
|
||||
break
|
||||
}
|
||||
|
||||
token := tokenizer.Token()
|
||||
|
||||
switch token.Type {
|
||||
case html.StartTagToken, html.SelfClosingTagToken:
|
||||
switch token.Data {
|
||||
case "a":
|
||||
for _, attr := range token.Attr {
|
||||
if attr.Key == "href" {
|
||||
url := resolveURL(baseURL, attr.Val)
|
||||
urls[url] = struct{}{}
|
||||
}
|
||||
}
|
||||
case "img", "link", "script":
|
||||
for _, attr := range token.Attr {
|
||||
if attr.Key == "src" || attr.Key == "href" {
|
||||
url := resolveURL(baseURL, attr.Val)
|
||||
urls[url] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allURLs := []string{}
|
||||
for url := range urls {
|
||||
allURLs = append(allURLs, url)
|
||||
}
|
||||
|
||||
return allURLs
|
||||
}
|
||||
|
||||
func getBaseURL(url string) string {
|
||||
parts := strings.Split(url, "/")
|
||||
return parts[0] + "//" + parts[2]
|
||||
}
|
||||
|
||||
func resolveURL(baseURL, url string) string {
|
||||
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
|
||||
return url
|
||||
} else if strings.HasPrefix(url, "//") {
|
||||
return "https:" + url
|
||||
} else if strings.HasPrefix(url, "/") {
|
||||
return baseURL + url
|
||||
} else {
|
||||
return baseURL + "/" + url
|
||||
}
|
||||
}
|
4612
go/url.txt
Normal file
4612
go/url.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user