Pen Settings

HTML

CSS

CSS Base

Vendor Prefixing

Add External Stylesheets/Pens

Any URLs added here will be added as <link>s in order, and before the CSS in the editor. You can use the CSS from another Pen by using its URL and the proper URL extension.

+ add another resource

JavaScript

Babel includes JSX processing.

Add External Scripts/Pens

Any URL's added here will be added as <script>s in order, and run before the JavaScript in the editor. You can use the URL of any other Pen and it will include the JavaScript from that Pen.

+ add another resource

Packages

Add Packages

Search for and use JavaScript packages from npm here. By selecting a package, an import statement will be added to the top of the JavaScript editor for this package.

Behavior

Auto Save

If active, Pens will autosave every 30 seconds after being saved once.

Auto-Updating Preview

If enabled, the preview panel updates automatically as you code. If disabled, use the "Run" button to update.

Format on Save

If enabled, your code will be formatted when you actively save your Pen. Note: your code becomes un-folded during formatting.

Editor Settings

Code Indentation

Want to change your Syntax Highlighting theme, Fonts and more?

Visit your global Editor Settings.

HTML

              
                
              
            
!

CSS

              
                
              
            
!

JS

              
                /* universal (全体に適用する gulp plugin loadプラグイン対象をコメントアウト)
 ---------------------------------------------------------- */
var gulp   = require('gulp'); //gulp
var client = require('cheerio-httpcli');
var json2xls = require('json2xls');
var fs = require('fs-extra');


/* ----------------------------------------------------------
 function
 ---------------------------------------------------------- */
/* json2csv
 -----------------------------------------------------------*/
function json2csv(json) {
    var header = Object.keys(json[0]).join(',') + "\n";
    var body = json.map(function(d){
        return Object.keys(d).map(function(key) {
            return d[key];
        }).join(',');
    }).join("\n");
    return header + body;
}

/* dir の作成
 -----------------------------------------------------------*/
var outputDir = function (path) {
    if(Array.isArray(path)){
        path.forEach(function (v,i,a) {
            fs.mkdirsSync(path[i]);
        });
    }else{
        fs.mkdirsSync(path);
    }
};

/* ----------------------------------------------------------
 task
 ---------------------------------------------------------- */
/* scraping
 -----------------------------------------------------------*/
gulp.task('scraping', function() {

    // settings
    // -------------------------------- //
    //検索セレクタのルート body内を検索
    var searchRoot = 'body';
    //検索セレクタ
    var searchSelector = [
        'h1', // 要素の指定
        'h2',
        'h3',
        '.ttl-headline', // classの指定
        '.txt-cmn'
    ].join(',');

    //エクセルの出力先を設定
    var exportPath = './dest/output_excel/';// 相対パスで記載してください。

    //検索URL設定
    var urlDomain = 'http://localhost:3004';// ドメインを設定
    var urlList = [
        '/index.html', // 上記URLと連結します urlDomain + urlList[0] >>> http://localhost:3004/index.html
        '/about/index.html',
        '/news/20180101.html',
        '/news/20180120.html',
        '/news/20180409.html',
        '/news/20180501.html',
        '/access/index.html'
    ];

    // 処理
    // -------------------------------- //

    //出力ディレクトリ作成
    outputDir(exportPath);
    // スクレイピング処理
    urlList.forEach(function (value) {
        client.fetch(urlDomain + value, {}, function (err, $, res) {
            if(err){
                console.log(urlDomain + value + ' のエラー');
            }else{
                console.log(urlDomain + value + ' を取得開始');
                var pageData = [];

                //対象のセレクタの処理ループ
                $(searchRoot).find(searchSelector).find('').remove();
                $(searchRoot).find(searchSelector).each(function (index) {
                    //セル情報を作成
                    var data = {
                      tag: $(this).prop('tagName'),
                      class: $(this).attr('class'),
                      text: $(this).text(),
                      src: $(this).attr('src'),
                      innerHtml: $(this).html()
                    };
                    //
                    pageData.push(data);
                });

                //書き出し
                var xls = json2xls(pageData);
                fs.writeFileSync(exportPath+value.replace(/^\//,'').replace(/\//g,'_')+'.xlsx', xls, 'binary');
            }
        });
    });
});
              
            
!
999px

Console