This commit is contained in:
2024-02-24 19:54:50 +08:00
parent b8b522c205
commit 84ee0032d7
9 changed files with 4939 additions and 0 deletions

73
go/all_urls.txt Normal file
View File

@ -0,0 +1,73 @@
https://blog.highp.ing/#适当的空格与大写
https://blog.highp.ing/#标点符号
https://blog.highp.ing/#小结
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/photoswipe.min.css
https://blog.highp.ing/safari-pinned-tab.svg
https://blog.highp.ing/
https://blog.highp.ing/index.xml
https://blog.highp.ing/p/newlogo/HighPingNetwork.d2f77b5eeb3dfc00db07dda5bc2aeca7_hu49df1706fbb734a84765f7148703f781_33088_250x150_fill_box_smart1_3.png
https://blog.highp.ing/p/dn42/
https://blog.highp.ing/site.webmanifest
https://github.com/High-Ping-Network
https://blog.highp.ing/#列表
https://blog.highp.ing/p/writemarkdown/image-6.png
https://blog.highp.ing/HighPingBlog.png
https://blog.highp.ing/archives/
https://blog.highp.ing/p/writemarkdown/image.png
https://blog.highp.ing/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js
https://blog.highp.ing/p/writemarkdown/
https://blog.highp.ing/p/writemarkdown/image-10.png
https://blog.highp.ing/p/writemarkdown/image-12.png
https://blog.highp.ing/p/writemarkdown/image-2.png
https://blog.highp.ing/p/newlogo/
https://blog.highp.ing/favicon-16x16.png
https://highp.ing
https://blog.highp.ing/categories/main/
https://blog.highp.ing/#图片的用法
https://blog.highp.ing/p/writemarkdown/image-9.png
https://github.com/CaiJimmy/hugo-theme-stack
https://blog.highp.ing/p/yidongyunshouji/
https://blog.highp.ing/p/jianshiweb/_hu37034c753bbeaf0bdc92aa24aad0f5f3_25914_03d51669136658de002d17b3b77401b4.png
https://jimmycai.com
https://cdn.jsdelivr.net/npm/node-vibrant@3.1.6/dist/vibrant.min.js
https://blog.highp.ing/%E5%85%B3%E4%BA%8E/
https://blog.highp.ing/p/writemarkdown/image-1.png
https://blog.highp.ing/p/writemarkdown/image-13.png
https://blog.highp.ing/#需要高亮的地方
https://blog.highp.ing/#无需高亮的地方
https://blog.highp.ing/p/writemarkdown/image-18.png
https://blog.highp.ing/p/writemarkdown/image-16_hucaed53c3386eabc3ed1d9c996cb20685_207073_800x0_resize_box_3.png
https://blog.highp.ing/p/writemarkdown/image-14.png
https://blog.highp.ing/p/chinamobilegamebooster/
https://gohugo.io/
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/photoswipe.min.js
https://blog.highp.ing/scss/style.min.abbd69b2908fdfcd5179898beaafd374514a86538d81639ddd2c58c06ae54e40.css
https://blog.highp.ing/favicon-32x32.png
https://blog.highp.ing/links/
https://markdown.com.cn/basic-syntax/
https://blog.highp.ing/p/writemarkdown/image-5.png
https://blog.highp.ing/ts/main.js
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/default-skin/default-skin.min.css
https://blog.highp.ing
https://blog.highp.ing/p/writemarkdown/image-4.png
https://blog.highp.ing/p/dn42/dn42.ab0e3124c98adc226fb768036b6c37eb_hu2aff6188372466bf0fcde6ee8756fedf_2314_250x150_fill_box_smart1_3.png
https://blog.highp.ing/#适当的高亮
https://blog.highp.ing/p/writemarkdown/image-11.png
https://cdn.jsdelivr.net/npm/photoswipe@4.1.3/dist/photoswipe-ui-default.min.js
https://blog.highp.ing/http:/t.me/HighPingNetwork
https://blog.highp.ing/#关于-markdown
https://blog.highp.ing/p/writemarkdown/image-15.png
https://blog.highp.ing/p/yidongyunshouji/61749296_p0.7e13bd353bfedd6f3da607f8ccbccb24_hu26182db6e8492496d9cea804dfb078ec_209927_250x150_fill_q75_box_smart1.jpg
https://blog.highp.ing/apple-touch-icon.png
https://blog.highp.ing/#标题的递进
https://blog.highp.ing/#无需出现的标点符号
https://blog.highp.ing/#规范
https://blog.highp.ing/#英文标点符号
https://blog.highp.ing/p/writemarkdown/image-8.png
https://blog.highp.ing/p/writemarkdown/image-7.png
https://blog.highp.ing/p/writemarkdown/image-17.png
https://blog.highp.ing/p/chinamobilegamebooster/img234.677508fc82cee96ddc407757e779244d_hue6d9a042141c193bafc41607b99de5f3_165854_250x150_fill_q75_h2_box_smart1_2.webp
https://blog.highp.ing/p/jianshiweb/
https://blog.highp.ing/search/
https://blog.highp.ing/#引用
https://blog.highp.ing/p/writemarkdown/image-3.png

5
go/go.mod Normal file
View File

@ -0,0 +1,5 @@
module main
go 1.21.6
require golang.org/x/net v0.21.0

2
go/go.sum Normal file
View File

@ -0,0 +1,2 @@
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=

BIN
go/http Executable file

Binary file not shown.

57
go/http.go Normal file
View File

@ -0,0 +1,57 @@
package main
import (
"bufio"
"fmt"
"net/http"
"os"
"sync"
)
var (
maxWorkers = 10
wg sync.WaitGroup
)
func worker(in <-chan string) {
for url := range in {
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Failed to request %s\n", url)
} else {
defer resp.Body.Close()
fmt.Printf("URL %s has been successfully requested\n", url)
}
wg.Done()
}
}
func main() {
file, err := os.Open("url.txt")
if err != nil {
fmt.Println("Failed to open file:", err)
return
}
defer file.Close()
// 创建一个带缓冲的通道
urls := make(chan string, maxWorkers)
// 启动goroutines处理请求
for i := 0; i < maxWorkers; i++ {
go worker(urls)
}
scanner := bufio.NewScanner(file)
// 将URL发送给 goroutines 处理
for scanner.Scan() {
url := scanner.Text()
wg.Add(1)
urls <- url
}
close(urls)
wg.Wait()
}

86
go/input_urls.txt Normal file
View File

@ -0,0 +1,86 @@
https://highp.ing
https://blogcdn.blog.highp.ing/p/2023zongjie
https://blogcdn.blog.highp.ing/p/GreatMusic
https://blogcdn.blog.highp.ing/p/ceping-nomaifreehost
https://blogcdn.blog.highp.ing/p/cf302
https://blogcdn.blog.highp.ing/p/dns
https://blogcdn.blog.highp.ing/p/jianshiweb
https://blogcdn.blog.highp.ing/p/nodepanels
https://blogcdn.blog.highp.ing/p/samopenai
https://blogcdn.blog.highp.ing/p/xui-scanner
https://blogcdn.blog.highp.ing/p/2024new
https://blogcdn.blog.highp.ing/p/Rclone-Fuse
https://blogcdn.blog.highp.ing/p/ceping-nomaius2
https://blogcdn.blog.highp.ing/p/chinamobilegamebooster
https://blogcdn.blog.highp.ing/p/fulltclash
https://blogcdn.blog.highp.ing/p/magisk-delta
https://blogcdn.blog.highp.ing/p/ourasn
https://blogcdn.blog.highp.ing/p/screen
https://blogcdn.blog.highp.ing/p/yidongyunshouji
https://blogcdn.blog.highp.ing/p/Alist
https://blogcdn.blog.highp.ing/p/ask-for-help
https://blogcdn.blog.highp.ing/p/ceping-poloukv6
https://blogcdn.blog.highp.ing/p/cloudflare-r2
https://blogcdn.blog.highp.ing/p/github-student-pack
https://blogcdn.blog.highp.ing/p/musicbot
https://blogcdn.blog.highp.ing/p/pgptogithub
https://blogcdn.blog.highp.ing/p/sqaipad
https://blogcdn.blog.highp.ing/p/AndroidInstallGoogle
https://blogcdn.blog.highp.ing/p/blogspeedup
https://blogcdn.blog.highp.ing/p/ceping-rackdog
https://blogcdn.blog.highp.ing/p/cloudflare-tunnel
https://blogcdn.blog.highp.ing/p/googlepaiming
https://blogcdn.blog.highp.ing/p/navidrome
https://blogcdn.blog.highp.ing/p/qqpupil
https://blogcdn.blog.highp.ing/p/vps-openvpn
https://blogcdn.blog.highp.ing/p/FreeLinkDomain
https://blogcdn.blog.highp.ing/p/ceping-499hkv6
https://blogcdn.blog.highp.ing/p/ceping-rackdogau
https://blogcdn.blog.highp.ing/p/dn42
https://blogcdn.blog.highp.ing/p/homecloud
https://blogcdn.blog.highp.ing/p/newlogo
https://blogcdn.blog.highp.ing/p/ruipingcloudflare
https://blogcdn.blog.highp.ing/p/writemarkdown
https://blog.highp.ing
https://blog.highp.ing/p/2023zongjie
https://blog.highp.ing/p/GreatMusic
https://blog.highp.ing/p/ceping-nomaifreehost
https://blog.highp.ing/p/cf302
https://blog.highp.ing/p/dns
https://blog.highp.ing/p/jianshiweb
https://blog.highp.ing/p/nodepanels
https://blog.highp.ing/p/samopenai
https://blog.highp.ing/p/xui-scanner
https://blog.highp.ing/p/2024new
https://blog.highp.ing/p/Rclone-Fuse
https://blog.highp.ing/p/ceping-nomaius2
https://blog.highp.ing/p/chinamobilegamebooster
https://blog.highp.ing/p/fulltclash
https://blog.highp.ing/p/magisk-delta
https://blog.highp.ing/p/ourasn
https://blog.highp.ing/p/screen
https://blog.highp.ing/p/yidongyunshouji
https://blog.highp.ing/p/Alist
https://blog.highp.ing/p/ask-for-help
https://blog.highp.ing/p/ceping-poloukv6
https://blog.highp.ing/p/cloudflare-r2
https://blog.highp.ing/p/github-student-pack
https://blog.highp.ing/p/musicbot
https://blog.highp.ing/p/pgptogithub
https://blog.highp.ing/p/sqaipad
https://blog.highp.ing/p/AndroidInstallGoogle
https://blog.highp.ing/p/blogspeedup
https://blog.highp.ing/p/ceping-rackdog
https://blog.highp.ing/p/cloudflare-tunnel
https://blog.highp.ing/p/googlepaiming
https://blog.highp.ing/p/navidrome
https://blog.highp.ing/p/qqpupil
https://blog.highp.ing/p/vps-openvpn
https://blog.highp.ing/p/FreeLinkDomain
https://blog.highp.ing/p/ceping-499hkv6
https://blog.highp.ing/p/ceping-rackdogau
https://blog.highp.ing/p/dn42
https://blog.highp.ing/p/homecloud
https://blog.highp.ing/p/newlogo
https://blog.highp.ing/p/ruipingcloudflare
https://blog.highp.ing/p/writemarkdown

BIN
go/main Executable file

Binary file not shown.

104
go/main.go Normal file
View File

@ -0,0 +1,104 @@
package main
import (
"bufio"
"fmt"
"net/http"
"os"
"strings"
"golang.org/x/net/html"
)
func main() {
file, err := os.Open("input_urls.txt")
if err != nil {
fmt.Println("无法打开文件:", err)
return
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
inputURL := scanner.Text()
urls := crawlAllURLs(inputURL)
file, err := os.Create("all_urls.txt")
if err != nil {
fmt.Println("无法创建文件:", err)
return
}
defer file.Close()
for _, url := range urls {
fmt.Println(url)
fmt.Fprintf(file, "%s\n", url)
}
}
}
func crawlAllURLs(url string) []string {
resp, err := http.Get(url)
if err != nil {
fmt.Println("无法获取网页:", err)
return []string{}
}
defer resp.Body.Close()
baseURL := getBaseURL(url)
urls := make(map[string]struct{})
tokenizer := html.NewTokenizer(resp.Body)
for {
tokenType := tokenizer.Next()
if tokenType == html.ErrorToken {
break
}
token := tokenizer.Token()
switch token.Type {
case html.StartTagToken, html.SelfClosingTagToken:
switch token.Data {
case "a":
for _, attr := range token.Attr {
if attr.Key == "href" {
url := resolveURL(baseURL, attr.Val)
urls[url] = struct{}{}
}
}
case "img", "link", "script":
for _, attr := range token.Attr {
if attr.Key == "src" || attr.Key == "href" {
url := resolveURL(baseURL, attr.Val)
urls[url] = struct{}{}
}
}
}
}
}
allURLs := []string{}
for url := range urls {
allURLs = append(allURLs, url)
}
return allURLs
}
func getBaseURL(url string) string {
parts := strings.Split(url, "/")
return parts[0] + "//" + parts[2]
}
func resolveURL(baseURL, url string) string {
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
return url
} else if strings.HasPrefix(url, "//") {
return "https:" + url
} else if strings.HasPrefix(url, "/") {
return baseURL + url
} else {
return baseURL + "/" + url
}
}

4612
go/url.txt Normal file

File diff suppressed because it is too large Load Diff