di sqrt(16)

sysuse auto, clear

describe

tabulate foreign

tabstat price, by(foreign) stat(mean) f(%12,1fc)

cd "C:\Users\ibtall\Documents" //Set and define the directory

findfile exportstata.ipynb, all //To look at the file

fs *.ipynb // print all file in dta format

dir // print filename in courant directory

ls //Same as dir command

pwd //Print working directory

which regress //To see the version of installed ado

mkdir new_folder

sysdir //Print all stata system directory for installing

sysuse dir //Listing the datasets in stata memory

erase data.dta

// In order to save our work, we use a log file
log using myfile, text replace
/* log off: to stop saving
   log on: to reactive the save 
log close */

* Help command: use command
help use
help search

* To install ado-file: outreg ado
ssc install fs
net install xtable

sysuse citytemp, clear

use "Outputs\data.dta", clear // load stata data in specific path direction

import excel "dt.xlsx", sheet("sheet1") cellrange(A1:C20) firstrow case("lower")

import delimited "dt.csv", rowrange(2:20) colrange(1:8) varname(2)

webuse set "https/www.ansd.sn/data"

webuse "data"

save "mydata", nolabel replace orphans // orphans for saving values lables

saveold "myolddata", version(12) replace nolabel //Saving currant data in specific stata version

export excel using mydata.xlsx, replace //Saving currant data in Excel fomat

export delimited using mydata.csv, delimiter(",") replace //Saving in text file with comma separated values format

sort region, stable //Sorting data by region and conserved

gsort division -region

varmanage //To manage varibles attributes to the "variables manage" window

format tempjuly tempjan %-8.2fc // format types: %0#.#gc; %-#.#fc; %-#.#e; %-tc; %~#s; %-#s;

list heatdd if inrange(region,1,3) & ! missing(division)

ds, not(type byte)
ds, has(varlabel "region") insensitive

lookfor "region" //Research variables contening some world

browse in 1/20 //Take a look at the data : Ctrl+8

count if inlist(region, 1,2)

assert inrange(division, 1, 40) //Verify some logic in whithin variable values

describe region

codebook division, header notes //Get informations on variables and data set and printing notes

by region, sort: inspect tempjuly tempjan //Display summaries of variables using by and sort

bysort region: summarize heatdd, meanonly

sample 10, by(region) count // (10% without count option)

notes region: senegal is not concerning //Add notes to data or variables and print it
notes region //Display added notes to the variable

recast double region, force //Change the type of variable

tostring region, gen(region_str)

destring region_str, force replace

decode division, gen(division_str) maxlength(20)

encode division_str, gen(division_bis) label(division) // label() to specify label values name

decode division_bis, gen(division_str2) label(division)

mvdecode division region, mv(99 88) // Replace all 88 and 99 by sysmis value==.

mvencode _all if regionn == 1 | division < 3, mv(99) //Replace all missing values by 99 for specifics observations

isid region //Look whatever variable identify uniquely observations

duplicates report region //Look for number of duplicates values in variable

duplicates list region division, sepby(region) //Listing duplicates values of listed variables separated by region

duplicates examples region //List some examples of duplicates values of variable

duplicates tag region, gen(region_duplic) //Generate new variable of number of duplicated values

duplicates drop idvar, force //Drop all duplicated values whithin variable

label data "This data base is related to climate informations" //Labelling currant data

label variable region "The region's names"

label define mylab 1 Hot 2 cold                 //Defining label values names and codes

label define mylab 3 "litle hot", add replace   //Adding new code to existing value label

label define mylab 1 "very hot", modify replace //Modifying code label in existing value label

label values myvar mylab // Assigning value label to a variable

label dir //Printing existing value label names

label list //Listing name and content of existing label value

label list mylab //Listing content of specific value label

label copy mylab mynewlab, replace // copy mylab into mynewlab and replace, mynewlab is now the value label name

label save using labdofile, replace // save all value label in a do file, replace existing dofile

label save mylab using labfile, replace // save only valu label named mylab

label drop _all // drop all value label, we can specify the value label names to be dropped

recode v1 (3/5=0 "Value 0") (1/2=1 "Value 1"), gen(newv1) label(mylabel) // Recoding v1 into newv1 and new label val

recode x1 x2 (1=5) (2=4) (3=3) (4=2) (5=1), pre(n) test // Changing x1 x2 vales, and storing results into nx1 nx2...

levelsof region // See levels of categorical variables

levelsof region, missing local(region_levels) //store levels, including missing local macro

labelbook, limit(20) problems detail //all (max 20) value label and var linked to, we can specify label name

numlabel mylab, add mask("#.")     //Transforme label "very hot" --> "1. very hot"

numlabel mylab, remove mask("#.") //Delete the previouse format

uselabel using labelbase, clear var //Create dataset of all value label (we can specify value label to save)

label language //list the existing label language

label language french, new //Create new set of label

label language french, copy //Create new set of label by copying the existing one

label language french // change label to french label language with is defined earlier

label language eng, rename // rename current label set to eng

label language french, delete // delete label language named french

rename region myregion

rename (myregion zone)(region newzone)

ssc install elabel

elabel variable (var1 var2)("label 1" "label 2")

elabel define lname 1 "lname 1" 2 lname2 // Realy does the same as label define

elabel values (var1 var2)(lbl1 lbl2)

elabel dir, current // nomemory

elabel list, current // nomemory varlist

elabel remove lnamelist, not // remove all except lnamelist

elabel drop lname // same to label drop

elabel keep lname

elabel copy oldlname newlname // same to label copy

elabel save lname using mylabel, replace

elabel compare lname1 lname2

elabel duplicates report

elabel duplicates drop

elabel duplicates retain

elabel load using filename, lname(lname) value(value) label(label)

elabel recode lname (1=3 3/7=7/3), define(newlname)

elabel recode lnamelist (2 = .a "Missimg"), dryrun

elabel rename (oldlnameslist)(newlnameslist), force

elabel rename oldlnames, upper //lowe proper

generate bytes zone = heatdd < mean(heatdd) //Create new variables

generate agecat = autocode(age,4,18,65) // 4 equal groups betwen 18 and 65

generate byte agecat = recode(age,21,38,64,75) // Groups: . < 21 < 38 < 64 < 75 < .

egen myv_count = anycount(division region), values(1 2 3) //Number values in each observations of varlist

egen myv_match = anymatch(division region), values(1 2 3) //True (1) or false (0) if content any listed values

egen myv_vlues = anyvalue(division), values(1 2 3) //Value of division corresponding to 1 2 or 3

egen myv_concat = concat(division region), punct("") //Format(%9s) decode maxlength(10)

egen myv_nbnonmiss = count(heatdd), by(division region)

egen tempjanclass = cut(tempjan), at(2(10)73) label // == egen tempjanclass = cut(tempjan), at(2(10)73) icodes label

egen tempjanclass2 = cut(tempjan), group(5) // == egen tempjanclass = cut(tempjan), group(5) icodes

egen myv_diff = diff(division region) //1 if division is different to region

egen myv_sub = ends(division_str), punct(" ") trim last //Trim for deleting first and last space| head last or tail

egen myn_fill = fill(11 13 15 17 19 21 23 27) //Listed numbers by increamented the rest of numbers

egen myn_group = group(division_str region), missing label truncate(5) //Labelname can be use

egen myn_group = group(division_str region), missing label truncate(5) //Labelname can be use

egen myv_iqr = iqr(tempjuly+tempjan), by(division region) //Ingter Quartile Range

egen myv_pctile = pctile(tempjuly+tempjan), by(division region) p(25) //Ingter Quartile Range

egen myn_kurt = kurt(heatdd), by(division region) //Kurtosis of heatdd

egen myn_skew = skew(heatdd), by(division region) //Skewness of heatdd

egen myv_mad = mad(tempjuly+tempjan), by(division region)

egen myv_max = max(tempjuly+tempjan), by(division region)

egen myv_mdev = mdev(tempjuly+tempjan), by(division region)

egen myv_mean = mean(tempjuly+tempjan), by(division region)

egen myn_median = median(tempjuly+tempjan), by(division region)

egen myv_min = min(tempjuly+tempjan), by(division region)

egen myv_mod = mod(tempjan), by(division_str region) // Most commun temperature of january

egen myv_pc = pc(tempjuly+tempjan), by(division region) //Prop obtion to obtain proportions instead of pourcentage

egen myv_rank = rank(tempjuly+tempjan), by(division region) unique //Field track | the rank of values in varlist

* rowfirst(), rowlast(), rowmax(), rowmean(), rowmedian(), rowpctile() [, p(#)], rowmin(), 
* row[non]miss()==nb of [non]missing, rowsd(), rowtotal(), 
egen myv_nomiss = rownonmiss(tempjuly tempjan division_str), strok //This option include missing for string

egen myv_tot = rowtotal(tempjuly tempjan), missing // missing if all are missing instead of zero see also total()

egen myv_sd = sd(tempjuly+tempjan), by(division region) // standard deviation

egen myv_sep = seq(), from(2) to(90) block(7) by(region division) // create a sequence of integers

egen myv_std = std(tempjuly+tempjan), mean(10) std(2)

egen myv_tag = tag(division region) //, missing to include missing | look if all values are not missing

matrix m = (2,3,4)  //Create vector of values to be used as mean

matrix s = (5,10,20) //Create vector of values to be used as standard error

drawnorm v_x v_y v_z, means(m) sds(s) //Create three variables of normal distribution

separate tempjuly, by( inrange(region, 1,2,3) & tempjan > 10) gen(newtp) shortlabel //sequantial obtion for 1,2,3...

pctile myv_decil = tempjuly, nquantiles(10) genp(percentdeci) // create two var containing decile and percent deci

xtile myv_xtile = tempjuly, nquantiles(10)     // deciles cretion

xtile myv_xtilcut = tempjuly, cutpoints(region) // percentiles with reion as cut points

range new_square 0 7*_pi 300 // create new variable from 0 to 7*_pi of 300 observations

append using data // Add observations to the corresponding variables

merge 1:1 ID using data, noreport keepusing(varlist) generate(linkvar) //Merge data to current base by ID as key

merge m:1 ID using data //Many observations in current base have same ID

merge 1:m ID using data //Many observations in using base have same ID

merge 1: _n using data //Many observations in using base have same ID

set obs 20 //Create new dataset with 20 observations

insobs 10, after(2) //nsert 10 new after the 2nd observation

expand 2, gen(type) //Duplicates each observation by 2, type = 0 if observation == original dataset

order tempjuly tempjan, after(region)

reshape long inc@r ue, i(id) j(year)

reshape wide inc@r ue, i(id) j(year)

reshape error //To look at the reshape error

xpose, clear varname format(%6.2f) // transpose dataset observations become variables name

describe, simple

summarize

sumstats // an other summarize commdand

preserve //Save a copy of the data in memory

collapse (mean) mheatdd=heatdd (count) nbcooldd=cooldd, by(division region)

statsby vmean = r(mean) vsd = r(sd), basepop(region < 4) by(region) total nodots verbose: summarize tempjuly, detail

statsby _b _se, basepop(inlist(region, 1,2)) by(region) saving(restemp) total nodots verbose: regress tempjuly templan

contract tempjuly tempjan, freq(fvar) percent(pvar) float format(%9.2f) nomiss // Make datasets of frequencies

compare tempjuly tempjan, by(region) // look at differences betwen two variables

restore // restore the saved data by preserve

gen division_str2 = abbrev(division_str, 2) // Mountain and pacific will be abbreved

gen indregionville = indexnot(division_str2, region_str) // position of first char of division_str2 in region_str

gen plusregion = plurial(2, region_str, "+es") // + for add and - for substract, 1 not add and 2 to add

gen logicmatch = ustrregexm(division_str2, region_str, 1) // 1 or 0 if s1 match with s2

gen fisrt_occ = ustrregexrf(divion_str, region_str, "oui", 1) // replace by oui first region_str in division_str

gen all_occ = ustrregexra(divion_str, region_str, "ouiall", 1) // replace by ouiall region_str in division_str

gen nospace_div = stritrim(division_str) //remove mutilple space within texte

gen divlen = ustrlen(division_str) // Number of chars in text of division

gen lowerdiv = ustrlower(division_str, "fr") // lowercase in local french : ustrupper(division_str, "fr")

gen left_trim = ustrltrim(division_str) // no space at left : ustrrtrim(division_str) right trim

split region_str, generate(newreg) parse(" ") limit(3) destring ignore("/") float percent /* Separate string 
            variables by parse chars, creating 3 new vars, converting in numeric (float) and percent as fraction */

tabulate division, gen(division_) missing nolabel sort nofreq subop(region) plot matcell(freqstore)

tabulate division region, chi2 lrchi2 cchi2 clrchi2 exact gamma taub v column row rowsort colsort nofreq nolabel ///
 cell expected missing

tabulate division, all // equivalent to specifying chi2 lrchi2 V gamma taub

tab1 division region, sort // one-way tabulate for many variables

tabulate division region, summarize(heatdd) nomean nostandard nofreq nolabel noobs wrap missing

tab2 division region zone, row nofreq // Two by two tables comines(n, p)

*freq, mean, sd, semean, sebinomial, sepoisson, sum, rawsum, count, n, max, min, median, iqr, pn
table division region, by(zone) contents( mean heatdd) center left row column ///
  scolumn concise missing replace format(%9.0g) cellwidth(9) csepwidth(9) scsepwidth(9) stubwidth(9)

tabstat heatdd, by(division) statistics(mean) format(%9.2fc) save // to save the result in matrix

ir // epitab

graph bar cooldd if region == 4 & division > 5, over(zone) over(region) over(division)

graph box heatdd cooldd, over(region)

graph dot (mean) cooldd, over(division)

graph pie cooldd, over(division) plabel(_all percent)

graph save "divgrp", replace

graph pie cooldd, over(region) plabel(_all percent)

graph save "reggraph", replace

graph rename "reggraph" "reggrp", replace

graph combine "divgrp" "reggrp"

graph export my2grp, as(png) width(600) height(450) replace

scalar a = 1

scalar b = a + 3 //We can make opertaion with scalar

display b

scalar txt = "Je m'appelle" //We can make a string scalar

scalar txt = txt + " Ibrahima TALL"

di txt

scalar dir //We can list all scalars

scalar list //Same as above

scalar drop _all //We can drop all scalar in memory

capture local drop name //Local macros is available only within the defining function or stata session

local name Tall and mee
di "`name'"

local i = 1 // Equal sign mean that expression on rigth will be evaluated

local tp: type tempjuly              // local macro "tp" refers to variable stored type

local lbl: variable label tempjuly // local macro "lbl" refers to variable label

local vlblname: value label myvar // get value label name

local label1 : label (myvar) 1   // get label of the value 1

local label2 : label myvarlab 2 // get label of the value 2

di "`: type tempjuly'" //This attributes can be used in a simple way

local cmdprop: properties help //Get command properties

di "`cmdprop'"

quietly tab region division, nofreq row //Get the reuslts of command: scalars|macros|matrices|functions

local rescom: r(scalars)

di "`rescom'"

local vsort: sortedby //To see with what variables the data set is sorted

di "`vsort'"

global nom monpere //Global marco
di "$nom"

macro dir //Listed defined macro

macro list //Same as above

local vlist moi et toi //To use macro shift we need tokenize command to store list in 1 2 3 so one

tokenize `vlist'
while "`1'" ~= "" {
    display "`1'"
    macro shift
}
foreach x in 1 2 3 { //Foreach using
    di "`x'"
}
local i = 1 //While function can combine ++i, i++, --i, i--
while (`++i' < 5){
    di "`i*2'"
}

help program

capture program drop talprog //A program that calculate the number of similar characters betwen two strings
program define talprog, rclass
    version 9.1
    syntax varlist(min=2 max=2 string) [=exp] [if] [in] [iweight], [by(varlist) GENerate(name) mult(real 1)]
    args x y
    marksample touse
    local i = 1
    local nb = 0
    while(i++ <= strlen(`x')){
        forvalues j = 1/strlen(`y'){
            local nb = `nb' + x[i] == y[j] if `touse' `in'
        }
    }
    return scalar nb
end
talprog //We call the above program
viewsource ml.ado //We can take a look at the content of ado-program
help marksample

clear
exit

help fvset

STATA ressources for data processing and analysis...¶

IBRAHIMA TALL, ¶

I. Data loading ¶

I.4 Managing directory ¶

I.1 Log using to save the work ¶

I.2 Looking for help and research ¶

I.3 Installing stata packages ¶

I.5 Loading, saving and exporting data ¶

II. Data treatment and wrangling ¶

II.1 Looking at the data ¶

II.2 Changing variables types and duplicates values managing ¶

II.3 Managing Labels and variables renaming ¶

II.4 Creating variables ¶

II.8 Combining datasets and arranging variables ¶

II.9 Summarizing variables ¶

III. Working with string in STATA ¶

IV. Tables and graphs ¶

V. Programming ressources ¶